Latest updates

This commit is contained in:
sanj 2024-07-22 12:19:31 -07:00
parent 52032e4084
commit 3eea22172f
31 changed files with 2487 additions and 1231 deletions

View file

@ -0,0 +1,69 @@
[
{
"Land Owner": "SIERRA PACIFIC INDUSTRIES",
"Location": "HBM: T2N R3E S17 ; HBM: \nT2N R3E S18 ; HBM: T2N \nR3E S7 ; HBM: T2N R3E S8",
"PLSS Coordinates": [
"HBM: T2N R3E S17",
"HBM: T2N R3E S8"
]
},
{
"Land Owner": "1/29/2024\n2/8/2024\n \n7/12/2024\n \n515.00\nBALLARD RESERVOIR\n(5526.510202);GRAVEN \nRESERVOIR\n(5526.510301);RALSTON \nGULCH(5526.510201)\nMDBM: T41N R10E S25 ; \nMDBM: T41N R10E S26 ; \nMDBM: T41N R10E S27 ; \nMDBM: T41N R10E S28 ; \nMDBM: T41N R10E S33 ; \nMDBM: T41N R10E S34 ; \nMDBM: T41N R10E S35 ; \nMDBM: T41N R10E S36 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY",
"Location": "MDBM:",
"PLSS Coordinates": []
},
{
"Land Owner": "2/16/2024\n2/22/2024\n \n7/26/2024\n \n520.00\nARMENTROUT FLAT \n(5526.620003)\n(5526.620003);JIMMERSON \nSPRING\n(5526.610104);MOSQUITO \nLAKE(5526.420403)\nMDBM: T40N R5E S13 ; \nMDBM: T40N R5E S14 ; \nMDBM: T40N R5E S22 ; \nMDBM: T40N R5E S23 ; \nMDBM: T40N R5E S24 ; \nMDBM: T40N R5E S25 ; \nMDBM: T40N R5E S26 ; \nMDBM: T40N R5E S36 ; \nMDBM: T40N R6E S19 ; \nMDBM: T40N R6E S30 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \nPage: \n7\n of \n10\n\nTimber Harvesting Plans\nHarvest Document\nReceived\nFiled\nApproval\nTentative \nEnd of \nPublic \nComments\nPublic \nComment \nPeriod \nClosed\nTotal\nAcres\nWatershed\nLocation\nLand Owner(s)",
"Location": "MDBM:",
"PLSS Coordinates": []
},
{
"Land Owner": "3/5/2024\n3/14/2024\n6/25/2024\n6/21/2024\n6/21/2024\n968.00\nLOWER BUTTE CREEK\n(5526.360103);POISON LAKE\n(5526.360201)\nMDBM: T33N R7E S16 ; \nMDBM: T33N R7E S17 ; \nMDBM: T33N R7E S18 ; \nMDBM: T33N R7E S19 ; \nMDBM: T33N R7E S20 ; \nMDBM: T33N R7E S33 ; \nMDBM: T33N R7E S4 ; \nMDBM: T33N R7E S5 ; \nMDBM: T33N R7E S8 ; \nMDBM: T33N R7E S9 ; \nMDBM: T34N R7E S27 ; \nMDBM: T34N R7E S33 ; \nMDBM: T34N R7E S34 ; \nMDBM: T34N R7E S35 ; \nMDBM: T34N R7E S36 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY",
"Location": "MDBM:",
"PLSS Coordinates": []
},
{
"Land Owner": "5/13/2024\n5/23/2024\n \n7/29/2024\n \n351.00\nCEDAR CREEK (1106.400710)\n(1106.400710);COPPER \nCREEK (1106.400704)\n(1106.400704);SQUIRREL \nGULCH(1106.400701)\nMDBM: T36N R7W S3 ; \nMDBM: T37N R7W S21 ; \nMDBM: T37N R7W S22 ; \nMDBM: T37N R7W S27 ; \nMDBM: T37N R7W S33 ; \nMDBM: T37N R7W S35 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY",
"Location": "MDBM:",
"PLSS Coordinates": []
},
{
"Land Owner": "4/10/2024\n4/18/2024\n \n5/13/2024\n \n362.00\nCHASE CREEK (8638.000201)\n(8638.000201)\nMDBM: T34N R12E S21 ; \nMDBM: T34N R12E S22 ; \nMDBM: T34N R12E S27 ; \nMDBM: T34N R12E S28 ; \nMDBM: T34N R12E S33 ; \nMDBM: T34N R12E S34 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \nPage: \n8\n of \n10\n\nTimber Harvesting Plans\nHarvest Document\nReceived\nFiled\nApproval\nTentative \nEnd of \nPublic \nComments\nPublic \nComment \nPeriod \nClosed\nTotal\nAcres\nWatershed\nLocation\nLand Owner(s)",
"Location": "MDBM:",
"PLSS Coordinates": []
},
{
"Land Owner": "6/28/2024\n7/3/2024\n \n7/29/2024\n \n500.00\nMCCARTY CREEK\n(5509.630203);PANTHER \nSPRING\n(5509.630202);REFUGE\n(5509.630201)\nMDBM: T27N R2E S1 ; \nMDBM: T27N R2E S10 ; \nMDBM: T27N R2E S11 ; \nMDBM: T27N R2E S2 ; \nMDBM: T27N R2E S3 ; \nMDBM: T27N R3E S5 ; \nMDBM: T27N R3E S6 ; \nMDBM: T28N R2E S34 ; \nMDBM: T28N R2E S35 ; \nMDBM: T28N R2E S36 ; \nMDBM: T28N R3E S31 ; \nMDBM: T28N R3E S32 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY",
"Location": "MDBM:",
"PLSS Coordinates": []
},
{
"Land Owner": "SIERRA PACIFIC INDUSTRIES",
"Location": "MDBM: T32N R1E S12 ; \nMDBM: T32N R1E S13 ; \nMDBM: T32N R1E S14 ; \nMDBM: T32N R1E S23 ; \nMDBM: T32N R1E S24 ; \nMDBM: T32N R2E S18 ; \nMDBM: T32N R2E S19 ; \nMDBM: T32N R2E S7",
"PLSS Coordinates": [
"MDBM: T32N R1E S12",
"MDBM: T32N R1E S13",
"MDBM: T32N R1E S14",
"MDBM: T32N R1E S23",
"MDBM: T32N R1E S24",
"MDBM: T32N R2E S18",
"MDBM: T32N R2E S19",
"MDBM: T32N R2E S7"
]
},
{
"Land Owner": "7/19/2024\n \n \n8/5/2024\n \n562.00\nBILLIE GULCH\n(1106.400703);CLAWTON \nGULCH\n(1106.400804);HATCHET \nCREEK(1106.400705);HAY \nGULCH\n(1106.400808);NELSON \nCREEK (1106.400702)\n(1106.400702)\nMDBM: T36N R7W S13 ; \nMDBM: T36N R7W S23 ; \nMDBM: T36N R7W S25 ; \nMDBM: T36N R7W S27 ; \nMDBM: T36N R7W S33 ; \nMDBM: T36N R7W S34 ; \nMDBM: T36N R7W S35 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \n2-24NTMP-00004-SHA\n7/19/2024\n \n \n9/2/2024\n \n480.00\nLOWER SODA CREEK\n(5525.210202)\nMDBM: T38N R4W S11 ; \nMDBM: T38N R4W S12 ; \nMDBM: T38N R4W S14 ; \nMDBM: T38N R4W S2 \nCASTLE CRAGS LLC",
"Location": "MDBM: T38N R4W S11 ; \nMDBM: T38N R4W S12 ; \nMDBM: T38N R4W S14 ; \nMDBM: T38N R4W S2 \nCASTLE CRAGS LLC",
"PLSS Coordinates": [
"MDBM: T38N R4W S11",
"MDBM: T38N R4W S12",
"MDBM: T38N R4W S14",
"MDBM: T38N R4W S2"
]
},
{
"Land Owner": "5/31/2024\n6/6/2024\n \n7/19/2024\n \n497.00\nLITTLE SILVER CREEK\n(5514.330206);PEAVINE \nCREEK(5514.330101);UNION \nVALLEY RESERVOIR\n(5514.340301)\nMDBM: T12N R14E S28 ; \nMDBM: T12N R14E S29 ; \nMDBM: T12N R14E S32 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \n81\n7/22/2024 12:18:13 PM\nPage: \n10\n of \n10",
"Location": "MDBM:",
"PLSS Coordinates": []
}
]

View file

@ -1,16 +1,12 @@
# __init__.py
import os
import json
import yaml
from pathlib import Path
import ipaddress
import multiprocessing
from dotenv import load_dotenv
from dateutil import tz
from pathlib import Path
from pydantic import BaseModel
from typing import List, Optional
from .classes import AutoResponder, IMAPConfig, SMTPConfig, EmailAccount, EmailContact, IncomingEmail, Database, Geocoder, APIConfig, Configuration
from .classes import Database, Geocoder, APIConfig, Configuration
from .logs import Logger
# INITIALization
@ -25,15 +21,15 @@ L = Logger("Central", LOGS_DIR)
# API essentials
API = APIConfig.load('api', 'secrets')
Dir = Configuration.load('dirs')
HOST = f"{API.BIND}:{API.PORT}"
HOST = f"{API.BIND}:{API.PORT}"
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
DB = Database.from_env()
News = Configuration.load('news', 'secrets')
IMG = Configuration.load('img', 'secrets')
News = Configuration.load('news', 'secrets')
Scrape = Configuration.load('scrape', 'secrets', Dir)
# Directories & general paths
ROUTER_DIR = BASE_DIR / "routers"
@ -98,7 +94,6 @@ SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 16384))
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
# Stable diffusion
IMG_DIR = DATA_DIR / "img" / "images"
os.makedirs(IMG_DIR, exist_ok=True)
@ -130,7 +125,6 @@ TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
# Calendar & email account
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
@ -185,19 +179,17 @@ CADDY_API_KEY = os.getenv("CADDY_API_KEY")
# Microsoft Graph
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
MS365_SECRET = os.getenv('MS365_SECRET')
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
MS365_KEY = MS365_KEY_PATH.read_text()
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
# Maintenance
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours

View file

@ -28,8 +28,18 @@ parser.add_argument('--test', type=str, help='Load only the specified module.')
args = parser.parse_args()
L.setup_from_args(args)
print(f"Debug modules after setup: {L.debug_modules}") # Debug print
print(f"Debug modules after setup: {L.debug_modules}")
logger = L.get_module_logger("main")
def debug(text: str): logger.debug(text)
debug(f"Debug message.")
def info(text: str): logger.info(text)
info(f"Info message.")
def warn(text: str): logger.warning(text)
warn(f"Warning message.")
def err(text: str): logger.error(text)
err(f"Error message.")
def crit(text: str): logger.critical(text)
crit(f"Critical message.")
app = FastAPI()
app.add_middleware(
@ -54,13 +64,13 @@ class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
if api_key_header:
api_key_header = api_key_header.lower().split("bearer ")[-1]
if api_key_header not in API.KEYS and api_key_query not in API.KEYS:
L.ERR(f"Invalid API key provided by a requester.")
err(f"Invalid API key provided by a requester.")
return JSONResponse(
status_code=401,
content={"detail": "Invalid or missing API key"}
)
response = await call_next(request)
# L.DEBUG(f"Request from {client_ip} is complete")
# debug(f"Request from {client_ip} is complete")
return response
# Add the middleware to your FastAPI app
@ -68,8 +78,8 @@ app.add_middleware(SimpleAPIKeyMiddleware)
@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
L.ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}")
L.ERR(f"Request: {request.method} {request.url}")
err(f"HTTP Exception: {exc.status_code} - {exc.detail}")
err(f"Request: {request.method} {request.url}")
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
@ -106,8 +116,8 @@ def main(argv):
if args.test:
load_router(args.test)
else:
L.logger.critical(f"sijapi launched")
L.logger.critical(f"Arguments: {args}")
crit(f"sijapi launched")
crit(f"Arguments: {args}")
for module_name in API.MODULES.__fields__:
if getattr(API.MODULES, module_name):
load_router(module_name)

View file

@ -1,36 +1,27 @@
# classes.py
import asyncio
import json
import yaml
import math
import os
import re
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar, Type
from zoneinfo import ZoneInfo
import aiofiles
import aiohttp
import asyncpg
import reverse_geocoder as rg
import yaml
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar
from dotenv import load_dotenv
from pydantic import BaseModel, Field, create_model
from srtm import get_data
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager
from datetime import datetime, timedelta, timezone
from timezonefinder import TimezoneFinder
from zoneinfo import ZoneInfo
from srtm import get_data
T = TypeVar('T', bound='Configuration')
import os
from pathlib import Path
from typing import Union, Optional, Any, Dict, List
import yaml
import re
from pydantic import BaseModel, create_model
from dotenv import load_dotenv
class Configuration(BaseModel):
HOME: Path = Path.home()
_dir_config: Optional['Configuration'] = None
@ -40,38 +31,50 @@ class Configuration(BaseModel):
yaml_path = cls._resolve_path(yaml_path, 'config')
if secrets_path:
secrets_path = cls._resolve_path(secrets_path, 'config')
try:
with yaml_path.open('r') as file:
config_data = yaml.safe_load(file)
print(f"Loaded configuration data from {yaml_path}")
if secrets_path:
with secrets_path.open('r') as file:
secrets_data = yaml.safe_load(file)
print(f"Loaded secrets data from {secrets_path}")
config_data.update(secrets_data)
# If config_data is a list, apply secrets to each item
if isinstance(config_data, list):
for item in config_data:
if isinstance(item, dict):
item.update(secrets_data)
else:
config_data.update(secrets_data)
# If config_data is a list, create a dict with a single key
if isinstance(config_data, list):
config_data = {"configurations": config_data}
# Ensure HOME is set
if config_data.get('HOME') is None:
config_data['HOME'] = str(Path.home())
print(f"HOME was None in config, set to default: {config_data['HOME']}")
load_dotenv()
instance = cls.create_dynamic_model(**config_data)
instance._dir_config = dir_config or instance
resolved_data = instance.resolve_placeholders(config_data)
instance = cls.create_dynamic_model(**resolved_data)
instance._dir_config = dir_config or instance
return instance
except Exception as e:
print(f"Error loading configuration: {str(e)}")
raise
@classmethod
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
base_path = Path(__file__).parent.parent # This will be two levels up from this file
@ -92,28 +95,50 @@ class Configuration(BaseModel):
else:
return data
def resolve_placeholders(self, data: Any) -> Any:
if isinstance(data, dict):
resolved_data = {k: self.resolve_placeholders(v) for k, v in data.items()}
# Special handling for directory placeholders
home = Path(resolved_data.get('HOME', self.HOME)).expanduser()
sijapi = home / "workshop" / "sijapi"
data_dir = sijapi / "data"
resolved_data['HOME'] = str(home)
resolved_data['SIJAPI'] = str(sijapi)
resolved_data['DATA'] = str(data_dir)
return resolved_data
elif isinstance(data, list):
return [self.resolve_placeholders(v) for v in data]
elif isinstance(data, str):
return self.resolve_string_placeholders(data)
else:
return data
def resolve_string_placeholders(self, value: str) -> Any:
pattern = r'\{\{\s*([^}]+)\s*\}\}'
matches = re.findall(pattern, value)
for match in matches:
parts = match.split('.')
if len(parts) == 1: # Internal reference
replacement = getattr(self._dir_config, parts[0], str(Path.home() / parts[0].lower()))
replacement = getattr(self, parts[0], str(Path.home() / parts[0].lower()))
elif len(parts) == 2 and parts[0] == 'Dir':
replacement = getattr(self._dir_config, parts[1], str(Path.home() / parts[1].lower()))
replacement = getattr(self, parts[1], str(Path.home() / parts[1].lower()))
elif len(parts) == 2 and parts[0] == 'ENV':
replacement = os.getenv(parts[1], '')
else:
replacement = value # Keep original if not recognized
value = value.replace('{{' + match + '}}', str(replacement))
# Convert to Path if it looks like a file path
if isinstance(value, str) and (value.startswith(('/', '~')) or (':' in value and value[1] == ':')):
return Path(value).expanduser()
return value
@classmethod
def create_dynamic_model(cls, **data):
for key, value in data.items():
@ -121,7 +146,7 @@ class Configuration(BaseModel):
data[key] = cls.create_dynamic_model(**value)
elif isinstance(value, list) and all(isinstance(item, dict) for item in value):
data[key] = [cls.create_dynamic_model(**item) for item in value]
DynamicModel = create_model(
f'Dynamic{cls.__name__}',
__base__=cls,
@ -133,7 +158,11 @@ class Configuration(BaseModel):
extra = "allow"
arbitrary_types_allowed = True
from pydantic import BaseModel, create_model
from typing import Any, Dict, List, Union
from pathlib import Path
import yaml
import re
class APIConfig(BaseModel):
HOST: str
@ -143,8 +172,10 @@ class APIConfig(BaseModel):
PUBLIC: List[str]
TRUSTED_SUBNETS: List[str]
MODULES: Any # This will be replaced with a dynamic model
EXTENSIONS: Any # This will be replaced with a dynamic model
TZ: str
KEYS: List[str]
GARBAGE: Dict[str, Any]
@classmethod
def load(cls, config_path: Union[str, Path], secrets_path: Union[str, Path]):
@ -154,9 +185,9 @@ class APIConfig(BaseModel):
# Load main configuration
with open(config_path, 'r') as file:
config_data = yaml.safe_load(file)
print(f"Loaded main config: {config_data}") # Debug print
# Load secrets
try:
with open(secrets_path, 'r') as file:
@ -168,12 +199,12 @@ class APIConfig(BaseModel):
except yaml.YAMLError as e:
print(f"Error parsing secrets YAML: {e}")
secrets_data = {}
# Resolve internal placeholders
config_data = cls.resolve_placeholders(config_data)
print(f"Resolved config: {config_data}") # Debug print
# Handle KEYS placeholder
if isinstance(config_data.get('KEYS'), list) and len(config_data['KEYS']) == 1:
placeholder = config_data['KEYS'][0]
@ -189,23 +220,29 @@ class APIConfig(BaseModel):
print(f"Secret key '{secret_key}' not found in secrets file")
else:
print(f"Invalid secret placeholder format: {placeholder}")
# Create dynamic ModulesConfig
modules_data = config_data.get('MODULES', {})
modules_fields = {}
for key, value in modules_data.items():
if isinstance(value, str):
modules_fields[key] = (bool, value.lower() == 'on')
elif isinstance(value, bool):
modules_fields[key] = (bool, value)
else:
raise ValueError(f"Invalid value for module {key}: {value}. Must be 'on', 'off', True, or False.")
DynamicModulesConfig = create_model('DynamicModulesConfig', **modules_fields)
config_data['MODULES'] = DynamicModulesConfig(**modules_data)
config_data['MODULES'] = cls._create_dynamic_config(config_data.get('MODULES', {}), 'DynamicModulesConfig')
# Create dynamic ExtensionsConfig
config_data['EXTENSIONS'] = cls._create_dynamic_config(config_data.get('EXTENSIONS', {}), 'DynamicExtensionsConfig')
return cls(**config_data)
@classmethod
def _create_dynamic_config(cls, data: Dict[str, Any], model_name: str):
fields = {}
for key, value in data.items():
if isinstance(value, str):
fields[key] = (bool, value.lower() == 'on')
elif isinstance(value, bool):
fields[key] = (bool, value)
else:
raise ValueError(f"Invalid value for {key}: {value}. Must be 'on', 'off', True, or False.")
DynamicConfig = create_model(model_name, **fields)
return DynamicConfig(**data)
@classmethod
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
base_path = Path(__file__).parent.parent # This will be two levels up from this file
@ -235,23 +272,26 @@ class APIConfig(BaseModel):
resolved_data[key] = [resolve_value(item) for item in value]
else:
resolved_data[key] = resolve_value(value)
# Resolve BIND separately to ensure HOST and PORT are used
if 'BIND' in resolved_data:
resolved_data['BIND'] = resolved_data['BIND'].replace('{{ HOST }}', str(resolved_data['HOST']))
resolved_data['BIND'] = resolved_data['BIND'].replace('{{ PORT }}', str(resolved_data['PORT']))
return resolved_data
def __getattr__(self, name: str) -> Any:
if name == 'MODULES':
return self.__dict__['MODULES']
if name in ['MODULES', 'EXTENSIONS']:
return self.__dict__[name]
return super().__getattr__(name)
@property
def active_modules(self) -> List[str]:
return [module for module, is_active in self.MODULES.__dict__.items() if is_active]
@property
def active_extensions(self) -> List[str]:
return [extension for extension, is_active in self.EXTENSIONS.__dict__.items() if is_active]
class Location(BaseModel):
@ -265,7 +305,7 @@ class Location(BaseModel):
city: Optional[str] = None
state: Optional[str] = None
country: Optional[str] = None
context: Optional[Dict[str, Any]] = None
context: Optional[Dict[str, Any]] = None
class_: Optional[str] = None
type: Optional[str] = None
name: Optional[str] = None
@ -286,6 +326,8 @@ class Location(BaseModel):
}
class Geocoder:
def __init__(self, named_locs: Union[str, Path] = None, cache_file: Union[str, Path] = 'timezone_cache.json'):
self.tf = TimezoneFinder()
@ -319,20 +361,20 @@ class Geocoder:
def find_override_location(self, lat: float, lon: float) -> Optional[str]:
closest_location = None
closest_distance = float('inf')
for location in self.override_locations:
loc_name = location.get("name")
loc_lat = location.get("latitude")
loc_lon = location.get("longitude")
loc_radius = location.get("radius")
distance = self.haversine(lat, lon, loc_lat, loc_lon)
if distance <= loc_radius:
if distance < closest_distance:
closest_distance = distance
closest_location = loc_name
return closest_location
async def location(self, lat: float, lon: float):
@ -346,7 +388,7 @@ class Geocoder:
async def elevation(self, latitude: float, longitude: float, unit: str = "m") -> float:
loop = asyncio.get_running_loop()
elevation = await loop.run_in_executor(self.executor, self.srtm_data.get_elevation, latitude, longitude)
if unit == "m":
return elevation
elif unit == "km":
@ -362,12 +404,12 @@ class Geocoder:
return ZoneInfo(timezone_str) if timezone_str else None
async def lookup(self, lat: float, lon: float):
city, state, country = (await self.location(lat, lon))[0]['name'], (await self.location(lat, lon))[0]['admin1'], (await self.location(lat, lon))[0]['cc']
elevation = await self.elevation(lat, lon)
timezone = await self.timezone(lat, lon)
return {
"city": city,
"state": state,
@ -379,12 +421,12 @@ class Geocoder:
async def code(self, locations: Union[Location, Tuple[float, float], List[Union[Location, Tuple[float, float]]]]) -> Union[Location, List[Location]]:
if isinstance(locations, (Location, tuple)):
locations = [locations]
processed_locations = []
for loc in locations:
if isinstance(loc, tuple):
processed_locations.append(Location(
latitude=loc[0],
latitude=loc[0],
longitude=loc[1],
datetime=datetime.now(timezone.utc)
))
@ -396,12 +438,11 @@ class Geocoder:
raise ValueError(f"Unsupported location type: {type(loc)}")
coordinates = [(location.latitude, location.longitude) for location in processed_locations]
geocode_results = await asyncio.gather(*[self.location(lat, lon) for lat, lon in coordinates])
elevations = await asyncio.gather(*[self.elevation(lat, lon) for lat, lon in coordinates])
timezone_results = await asyncio.gather(*[self.timezone(lat, lon) for lat, lon in coordinates])
def create_display_name(override_name, result):
parts = []
if override_name:
@ -446,13 +487,13 @@ class Geocoder:
async def geocode_osm(self, latitude: float, longitude: float, email: str):
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={latitude}&lon={longitude}"
headers = {
'User-Agent': f'sijapi/1.0 ({email})', # replace with your app name and email
'User-Agent': f'sijapi/1.0 ({email})',
}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
response.raise_for_status()
data = await response.json()
address = data.get("address", {})
elevation = await self.elevation(latitude, longitude)
return Location(
@ -465,7 +506,7 @@ class Geocoder:
city=address.get("city"),
state=address.get("state"),
country=address.get("country"),
context={},
context={},
class_=data.get("class"),
type=data.get("type"),
name=data.get("name"),
@ -481,7 +522,6 @@ class Geocoder:
timezone=await self.timezone(latitude, longitude)
)
def round_coords(self, lat: float, lon: float, decimal_places: int = 2) -> Tuple[float, float]:
return (round(lat, decimal_places), round(lon, decimal_places))
@ -501,17 +541,17 @@ class Geocoder:
not self.last_update or
current_time - self.last_update > timedelta(hours=1) or
not self.coords_equal(rounded_location, self.round_coords(*self.last_location) if self.last_location else (None, None))):
new_timezone = await self.timezone(lat, lon)
self.last_timezone = new_timezone
self.last_update = current_time
self.last_location = (lat, lon) # Store the original, non-rounded coordinates
await self.tz_save()
return self.last_timezone
async def tz_save(self):
cache_data = {
'last_timezone': str(self.last_timezone) if self.last_timezone else None,
@ -528,7 +568,7 @@ class Geocoder:
self.last_timezone = ZoneInfo(cache_data['last_timezone']) if cache_data.get('last_timezone') else None
self.last_update = datetime.fromisoformat(cache_data['last_update']) if cache_data.get('last_update') else None
self.last_location = tuple(cache_data['last_location']) if cache_data.get('last_location') else None
except (FileNotFoundError, json.JSONDecodeError):
# If file doesn't exist or is invalid, we'll start fresh
self.last_timezone = None
@ -546,7 +586,7 @@ class Geocoder:
async def tz_at(self, lat: float, lon: float) -> Optional[ZoneInfo]:
"""
Get the timezone at a specific latitude and longitude without affecting the cache.
:param lat: Latitude
:param lon: Longitude
:return: ZoneInfo object representing the timezone
@ -556,7 +596,6 @@ class Geocoder:
def __del__(self):
self.executor.shutdown()
class Database(BaseModel):
host: str = Field(..., description="Database host")
port: int = Field(5432, description="Database port")
@ -596,7 +635,6 @@ class Database(BaseModel):
def to_dict(self):
return self.dict(exclude_none=True)
class IMAPConfig(BaseModel):
username: str
password: str
@ -621,7 +659,7 @@ class AutoResponder(BaseModel):
image_prompt: Optional[str] = None
image_scene: Optional[str] = None
smtp: SMTPConfig
class EmailAccount(BaseModel):
name: str
refresh: int
@ -643,3 +681,12 @@ class IncomingEmail(BaseModel):
subject: str
body: str
attachments: List[dict] = []
class WidgetUpdate(BaseModel):
text: Optional[str] = None
progress: Optional[str] = None
icon: Optional[str] = None
color: Optional[str] = None
url: Optional[str] = None
shortcut: Optional[str] = None
graph: Optional[str] = None

View file

@ -1 +1 @@
{"last_timezone": "America/Los_Angeles", "last_update": "2024-06-29T09:36:32.143487", "last_location": [44.04645364336354, -123.08688060439617]}
{"last_timezone": "America/Los_Angeles", "last_update": "2024-07-22T12:00:14.193328", "last_location": [42.80982885281664, -123.0494316777397]}

View file

@ -0,0 +1,117 @@
import requests
import json
import time
import os
import subprocess
def get_feature_count(url):
params = {
'where': '1=1',
'returnCountOnly': 'true',
'f': 'json'
}
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
return data.get('count', 0)
def fetch_features(url, offset, num):
params = {
'where': '1=1',
'outFields': '*',
'geometryPrecision': 6,
'outSR': 4326,
'f': 'json',
'resultOffset': offset,
'resultRecordCount': num
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
def download_layer(layer_num, layer_name):
url = f"https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/{layer_num}/query"
total_count = get_feature_count(url)
print(f"Total {layer_name} features: {total_count}")
batch_size = 1000
offset = 0
all_features = []
while offset < total_count:
print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...")
data = fetch_features(url, offset, batch_size)
new_features = data.get('features', [])
if not new_features:
break
all_features.extend(new_features)
offset += len(new_features)
print(f"Progress: {len(all_features)}/{total_count} features")
time.sleep(1) # Be nice to the server
print(f"Total {layer_name} features fetched: {len(all_features)}")
# Convert to GeoJSON
geojson_features = [
{
"type": "Feature",
"properties": feature['attributes'],
"geometry": feature['geometry']
} for feature in all_features
]
full_geojson = {
"type": "FeatureCollection",
"features": geojson_features
}
# Save to file
file_path = f'/Users/sij/workshop/sijapi/sijapi/data/PLSS_{layer_name}.geojson'
with open(file_path, 'w') as f:
json.dump(full_geojson, f)
print(f"GeoJSON file saved as '{file_path}'")
return file_path
def import_to_postgis(file_path, table_name):
db_name = 'sij'
db_user = 'sij'
db_password = 'Synchr0!'
ogr2ogr_command = [
'ogr2ogr',
'-f', 'PostgreSQL',
f'PG:dbname={db_name} user={db_user} password={db_password}',
file_path,
'-nln', table_name,
'-overwrite'
]
subprocess.run(ogr2ogr_command, check=True)
print(f"Data successfully imported into PostGIS table: {table_name}")
def main():
try:
# Download and import Townships (Layer 1)
township_file = download_layer(1, "Townships")
import_to_postgis(township_file, "public.plss_townships")
# Download and import Sections (Layer 2)
section_file = download_layer(2, "Sections")
import_to_postgis(section_file, "public.plss_sections")
except requests.exceptions.RequestException as e:
print(f"Error fetching data: {e}")
except subprocess.CalledProcessError as e:
print(f"Error importing data into PostGIS: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,73 @@
import requests
import PyPDF2
import io
import re
def scrape_data_from_pdf(url):
response = requests.get(url)
pdf_file = io.BytesIO(response.content)
pdf_reader = PyPDF2.PdfReader(pdf_file)
all_text = ""
for page in pdf_reader.pages:
all_text += page.extract_text() + "\n"
return all_text
def parse_data(raw_data):
lines = raw_data.split('\n')
data = []
current_entry = None
for line in lines:
line = line.strip()
if re.match(r'\d+-\d+-\d+-\w+', line):
if current_entry:
data.append(current_entry)
current_entry = {'Harvest Document': line, 'Raw Data': []}
elif current_entry:
current_entry['Raw Data'].append(line)
if current_entry:
data.append(current_entry)
return data
def filter_data(data):
return [entry for entry in data if any(owner.lower() in ' '.join(entry['Raw Data']).lower() for owner in ["Sierra Pacific", "SPI", "Land & Timber"])]
def extract_location(raw_data):
location = []
for line in raw_data:
if 'MDBM:' in line or 'HBM:' in line:
location.append(line)
return ' '.join(location)
def extract_plss_coordinates(text):
pattern = r'(\w+): T(\d+)([NSEW]) R(\d+)([NSEW]) S(\d+)'
return re.findall(pattern, text)
# Main execution
url = "https://caltreesplans.resources.ca.gov/Caltrees/Report/ShowReport.aspx?module=TH_Document&reportID=492&reportType=LINK_REPORT_LIST"
raw_data = scrape_data_from_pdf(url)
parsed_data = parse_data(raw_data)
print(f"Total timber plans parsed: {len(parsed_data)}")
filtered_data = filter_data(parsed_data)
print(f"Found {len(filtered_data)} matching entries.")
for plan in filtered_data:
print("\nHarvest Document:", plan['Harvest Document'])
location = extract_location(plan['Raw Data'])
print("Location:", location)
plss_coordinates = extract_plss_coordinates(location)
print("PLSS Coordinates:")
for coord in plss_coordinates:
meridian, township, township_dir, range_, range_dir, section = coord
print(f" {meridian}: T{township}{township_dir} R{range_}{range_dir} S{section}")
print("-" * 50)

23
sijapi/helpers/article.py Executable file
View file

@ -0,0 +1,23 @@
#!/Users/sij/miniforge3/envs/sijapi/bin/python
import sys
import asyncio
from fastapi import BackgroundTasks
from sijapi.routers.news import process_and_save_article
async def main():
if len(sys.argv) != 2:
print("Usage: python script.py <article_url>")
sys.exit(1)
url = sys.argv[1]
bg_tasks = BackgroundTasks()
try:
result = await process_and_save_article(bg_tasks, url)
print(result)
except Exception as e:
print(f"Error processing article: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())

57
sijapi/helpers/cli.py Normal file
View file

@ -0,0 +1,57 @@
# cli.py
import click
import asyncio
from datetime import datetime as dt_datetime, timedelta
# Import your async functions and dependencies
from sijapi import build_daily_note_range_endpoint, gis # broken!
def async_command(f):
@click.command()
@click.pass_context
def wrapper(ctx, *args, **kwargs):
async def run():
return await f(*args, **kwargs)
return asyncio.run(run())
return wrapper
@click.group()
def cli():
"""CLI for your application."""
pass
@cli.command()
@click.argument('dt_start')
@click.argument('dt_end')
@async_command
async def bulk_note_range(dt_start: str, dt_end: str):
"""
Build daily notes for a date range.
DT_START and DT_END should be in YYYY-MM-DD format.
"""
try:
start_date = dt_datetime.strptime(dt_start, "%Y-%m-%d")
end_date = dt_datetime.strptime(dt_end, "%Y-%m-%d")
except ValueError:
click.echo("Error: Dates must be in YYYY-MM-DD format.")
return
if start_date > end_date:
click.echo("Error: Start date must be before or equal to end date.")
return
results = []
current_date = start_date
while current_date <= end_date:
formatted_date = await gis.dt(current_date)
result = await build_daily_note(formatted_date)
results.append(result)
current_date += timedelta(days=1)
click.echo("Generated notes for the following dates:")
for url in results:
click.echo(url)
if __name__ == '__main__':
cli()

View file

@ -43,7 +43,7 @@ async def process_all_emails(account: EmailAccount, summarized_log: Path, autore
L.INFO(f"Processed {processed_count} non-unread emails for account {account.name}")
except Exception as e:
L.ERR(f"An error occurred while processing emails for account {account.name}: {e}")
L.logger.error(f"An error occurred while processing emails for account {account.name}: {e}")
async def main():
email_accounts = email.load_email_accounts(EMAIL_CONFIG)

View file

@ -18,6 +18,11 @@ from sijapi import L, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL,
asr = APIRouter()
logger = L.get_module_logger("asr")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
class TranscribeParams(BaseModel):
model: str = Field(default="small")
@ -81,7 +86,7 @@ async def transcribe_endpoint(
return JSONResponse(content={"status": "timeout", "message": "Transcription is taking longer than expected. Please check back later."}, status_code=202)
async def transcribe_audio(file_path, params: TranscribeParams):
logger.debug(f"Transcribing audio file from {file_path}...")
debug(f"Transcribing audio file from {file_path}...")
file_path = await convert_to_wav(file_path)
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
@ -119,11 +124,11 @@ async def transcribe_audio(file_path, params: TranscribeParams):
command.extend(['--dtw', params.dtw])
command.extend(['-f', file_path])
logger.debug(f"Command: {command}")
debug(f"Command: {command}")
# Create a unique ID for this transcription job
job_id = str(uuid.uuid4())
logger.debug(f"Created job ID: {job_id}")
debug(f"Created job ID: {job_id}")
# Store the job status
transcription_results[job_id] = {"status": "processing", "result": None}
@ -135,20 +140,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
poll_interval = 1 # 1 second
start_time = asyncio.get_event_loop().time()
logger.debug(f"Starting to poll for job {job_id}")
debug(f"Starting to poll for job {job_id}")
try:
while asyncio.get_event_loop().time() - start_time < max_wait_time:
job_status = transcription_results.get(job_id, {})
logger.debug(f"Current status for job {job_id}: {job_status['status']}")
debug(f"Current status for job {job_id}: {job_status['status']}")
if job_status["status"] == "completed":
logger.info(f"Transcription completed for job {job_id}")
info(f"Transcription completed for job {job_id}")
return job_status["result"]
elif job_status["status"] == "failed":
logger.error(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
err(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
await asyncio.sleep(poll_interval)
logger.error(f"Transcription timed out for job {job_id}")
err(f"Transcription timed out for job {job_id}")
raise TimeoutError("Transcription timed out")
finally:
# Ensure the task is cancelled if we exit the loop
@ -159,20 +164,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
async def process_transcription(command, file_path, job_id):
try:
logger.debug(f"Starting transcription process for job {job_id}")
debug(f"Starting transcription process for job {job_id}")
result = await run_transcription(command, file_path)
transcription_results[job_id] = {"status": "completed", "result": result}
logger.debug(f"Transcription completed for job {job_id}")
debug(f"Transcription completed for job {job_id}")
except Exception as e:
logger.error(f"Transcription failed for job {job_id}: {str(e)}")
err(f"Transcription failed for job {job_id}: {str(e)}")
transcription_results[job_id] = {"status": "failed", "error": str(e)}
finally:
# Clean up the temporary file
os.remove(file_path)
logger.debug(f"Cleaned up temporary file for job {job_id}")
debug(f"Cleaned up temporary file for job {job_id}")
async def run_transcription(command, file_path):
logger.debug(f"Running transcription command: {' '.join(command)}")
debug(f"Running transcription command: {' '.join(command)}")
proc = await asyncio.create_subprocess_exec(
*command,
stdout=asyncio.subprocess.PIPE,
@ -181,9 +186,9 @@ async def run_transcription(command, file_path):
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
error_message = f"Error running command: {stderr.decode()}"
logger.error(error_message)
err(error_message)
raise Exception(error_message)
logger.debug("Transcription command completed successfully")
debug("Transcription command completed successfully")
return stdout.decode().strip()
async def convert_to_wav(file_path: str):

View file

@ -14,42 +14,46 @@ from dateutil.parser import isoparse as parse_iso
import threading
from typing import Dict, List, Any
from datetime import datetime, timedelta
from Foundation import NSDate, NSRunLoop
import EventKit as EK
from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
from sijapi.routers import loc
from sijapi.routers import gis
cal = APIRouter()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
timeout = httpx.Timeout(12)
logger = L.get_module_logger("cal")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
if MS365_TOGGLE is True:
logger.critical(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
crit(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
@cal.get("/o365/login")
async def login():
logger.debug(f"Received request to /o365/login")
logger.debug(f"SCOPE: {MS365_SCOPE}")
debug(f"Received request to /o365/login")
debug(f"SCOPE: {MS365_SCOPE}")
if not MS365_SCOPE:
logger.error("No scopes defined for authorization.")
err("No scopes defined for authorization.")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="No scopes defined for authorization."
)
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
logger.info(f"Redirecting to authorization URL: {authorization_url}")
info(f"Redirecting to authorization URL: {authorization_url}")
return RedirectResponse(authorization_url)
@cal.get("/o365/oauth_redirect")
async def oauth_redirect(code: str = None, error: str = None):
logger.debug(f"Received request to /o365/oauth_redirect")
debug(f"Received request to /o365/oauth_redirect")
if error:
logger.error(f"OAuth2 Error: {error}")
err(f"OAuth2 Error: {error}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
)
logger.info(f"Requesting token with authorization code: {code}")
info(f"Requesting token with authorization code: {code}")
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
data = {
"client_id": MS365_CLIENT_ID,
@ -60,15 +64,15 @@ if MS365_TOGGLE is True:
}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(token_url, data=data)
logger.debug(f"Token endpoint response status code: {response.status_code}")
logger.info(f"Token endpoint response text: {response.text}")
debug(f"Token endpoint response status code: {response.status_code}")
info(f"Token endpoint response text: {response.text}")
result = response.json()
if 'access_token' in result:
await save_token(result)
logger.info("Access token obtained successfully")
info("Access token obtained successfully")
return {"message": "Access token stored successfully"}
else:
logger.critical(f"Failed to obtain access token. Response: {result}")
crit(f"Failed to obtain access token. Response: {result}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to obtain access token"
@ -76,7 +80,7 @@ if MS365_TOGGLE is True:
@cal.get("/o365/me")
async def read_items():
logger.debug(f"Received request to /o365/me")
debug(f"Received request to /o365/me")
token = await load_token()
if not token:
raise HTTPException(
@ -89,10 +93,10 @@ if MS365_TOGGLE is True:
response = await client.get(graph_url, headers=headers)
if response.status_code == 200:
user = response.json()
logger.info(f"User retrieved: {user}")
info(f"User retrieved: {user}")
return user
else:
logger.error("Invalid or expired token")
err("Invalid or expired token")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired token",
@ -100,14 +104,14 @@ if MS365_TOGGLE is True:
)
async def save_token(token):
logger.debug(f"Saving token: {token}")
debug(f"Saving token: {token}")
try:
token["expires_at"] = int(time.time()) + token["expires_in"]
with open(MS365_TOKEN_PATH, "w") as file:
json.dump(token, file)
logger.debug(f"Saved token to {MS365_TOKEN_PATH}")
debug(f"Saved token to {MS365_TOKEN_PATH}")
except Exception as e:
logger.error(f"Failed to save token: {e}")
err(f"Failed to save token: {e}")
async def load_token():
if os.path.exists(MS365_TOKEN_PATH):
@ -115,21 +119,21 @@ if MS365_TOGGLE is True:
with open(MS365_TOKEN_PATH, "r") as file:
token = json.load(file)
except FileNotFoundError:
logger.error("Token file not found.")
err("Token file not found.")
return None
except json.JSONDecodeError:
logger.error("Failed to decode token JSON")
err("Failed to decode token JSON")
return None
if token:
token["expires_at"] = int(time.time()) + token["expires_in"]
logger.debug(f"Loaded token: {token}") # Add this line to log the loaded token
debug(f"Loaded token: {token}") # Add this line to log the loaded token
return token
else:
logger.debug("No token found.")
debug("No token found.")
return None
else:
logger.error(f"No file found at {MS365_TOKEN_PATH}")
err(f"No file found at {MS365_TOKEN_PATH}")
return None
@ -159,64 +163,137 @@ if MS365_TOGGLE is True:
response = await client.post(token_url, data=data)
result = response.json()
if "access_token" in result:
logger.info("Access token refreshed successfully")
info("Access token refreshed successfully")
return result
else:
logger.error("Failed to refresh access token")
err("Failed to refresh access token")
return None
async def refresh_token():
token = await load_token()
if not token:
logger.error("No token found in storage")
err("No token found in storage")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="No token found",
)
if 'refresh_token' not in token:
logger.error("Refresh token not found in the loaded token")
err("Refresh token not found in the loaded token")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Refresh token not found",
)
refresh_token = token['refresh_token']
logger.debug("Found refresh token, attempting to refresh access token")
debug("Found refresh token, attempting to refresh access token")
new_token = await get_new_token_with_refresh_token(refresh_token)
if new_token:
await save_token(new_token)
logger.info("Token refreshed and saved successfully")
info("Token refreshed and saved successfully")
else:
logger.error("Failed to refresh token")
err("Failed to refresh token")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to refresh token",
)
if ICAL_TOGGLE is True:
from Foundation import NSDate, NSRunLoop
import EventKit as EK
def get_calendar_ids() -> Dict[str, str]:
event_store = EK.EKEventStore.alloc().init()
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
# Helper to convert datetime to NSDate
def datetime_to_nsdate(dt: datetime) -> NSDate:
return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp())
calendar_identifiers = {
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
}
logger.debug(f"{calendar_identifiers}")
return calendar_identifiers
def get_calendar_ids() -> Dict[str, str]:
event_store = EK.EKEventStore.alloc().init()
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
# Helper to convert datetime to NSDate
def datetime_to_nsdate(dt: datetime) -> NSDate:
return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp())
calendar_identifiers = {
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
}
debug(f"{calendar_identifiers}")
return calendar_identifiers
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
event_store = EK.EKEventStore.alloc().init()
# Request access to EventKit
def request_access() -> bool:
access_granted = []
def completion_handler(granted, error):
if error is not None:
err(f"Error: {error}")
access_granted.append(granted)
with access_granted_condition:
access_granted_condition.notify()
access_granted_condition = threading.Condition()
with access_granted_condition:
event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent
access_granted_condition.wait(timeout=10)
if access_granted:
return access_granted[0]
else:
err("Request access timed out or failed")
return False
if not request_access():
err("Access to calendar data was not granted")
return []
ns_start_date = datetime_to_nsdate(start_date)
ns_end_date = datetime_to_nsdate(end_date)
# Retrieve all calendars
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
if calendar_ids:
selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids]
else:
selected_calendars = all_calendars
# Filtering events by selected calendars
predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars)
events = event_store.eventsMatchingPredicate_(predicate)
event_list = []
for event in events:
# Check if event.attendees() returns None
if event.attendees():
attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()]
else:
attendees = []
# Format the start and end dates properly
start_date_str = event.startDate().descriptionWithLocale_(None)
end_date_str = event.endDate().descriptionWithLocale_(None)
event_data = {
"subject": event.title(),
"id": event.eventIdentifier(),
"start": start_date_str,
"end": end_date_str,
"bodyPreview": event.notes() if event.notes() else '',
"attendees": attendees,
"location": event.location() if event.location() else '',
"onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this
"showAs": 'busy', # Default to 'busy'
"isAllDay": event.isAllDay()
}
event_list.append(event_data)
return event_list
@cal.get("/events")
async def get_events_endpoint(start_date: str, end_date: str):
start_dt = await loc.dt(start_date)
end_dt = await loc.dt(end_date)
start_dt = await gis.dt(start_date)
end_dt = await gis.dt(end_date)
datetime.strptime(start_date, "%Y-%m-%d") or datetime.now()
end_dt = datetime.strptime(end_date, "%Y-%m-%d") or datetime.now()
response = await get_events(start_dt, end_dt)
@ -237,80 +314,6 @@ async def get_events(start_dt: datetime, end_dt: datetime) -> List:
return parsed_events
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
event_store = EK.EKEventStore.alloc().init()
# Request access to EventKit
def request_access() -> bool:
access_granted = []
def completion_handler(granted, error):
if error is not None:
logger.error(f"Error: {error}")
access_granted.append(granted)
# Notify the main thread that the completion handler has executed
with access_granted_condition:
access_granted_condition.notify()
access_granted_condition = threading.Condition()
with access_granted_condition:
event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent
# Wait for the completion handler to be called
access_granted_condition.wait(timeout=10)
# Verify that the handler was called and access_granted is not empty
if access_granted:
return access_granted[0]
else:
logger.error("Request access timed out or failed")
return False
if not request_access():
logger.error("Access to calendar data was not granted")
return []
ns_start_date = datetime_to_nsdate(start_date)
ns_end_date = datetime_to_nsdate(end_date)
# Retrieve all calendars
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
if calendar_ids:
selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids]
else:
selected_calendars = all_calendars
# Filtering events by selected calendars
predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars)
events = event_store.eventsMatchingPredicate_(predicate)
event_list = []
for event in events:
# Check if event.attendees() returns None
if event.attendees():
attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()]
else:
attendees = []
# Format the start and end dates properly
start_date_str = event.startDate().descriptionWithLocale_(None)
end_date_str = event.endDate().descriptionWithLocale_(None)
event_data = {
"subject": event.title(),
"id": event.eventIdentifier(),
"start": start_date_str,
"end": end_date_str,
"bodyPreview": event.notes() if event.notes() else '',
"attendees": attendees,
"location": event.location() if event.location() else '',
"onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this
"showAs": 'busy', # Default to 'busy'
"isAllDay": event.isAllDay()
}
event_list.append(event_data)
return event_list
async def get_ms365_events(start_date: datetime, end_date: datetime):
token = await load_token()
if token:
@ -331,7 +334,7 @@ async def get_ms365_events(start_date: datetime, end_date: datetime):
response = await client.get(graph_url, headers=headers)
if response.status_code != 200:
logger.error("Failed to retrieve events from Microsoft 365")
err("Failed to retrieve events from Microsoft 365")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve events",
@ -342,48 +345,48 @@ async def get_ms365_events(start_date: datetime, end_date: datetime):
async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]):
range_start = await loc.dt(range_start)
range_end = await loc.dt(range_end)
range_start = await gis.dt(range_start)
range_end = await gis.dt(range_end)
event_list = []
for event in events:
logger.info(f"Event: {event}")
info(f"Event: {event}")
start_str = event.get('start')
end_str = event.get('end')
if isinstance(start_str, dict):
start_str = start_str.get('dateTime')
else:
logger.info(f"Start date string not a dict")
info(f"Start date string not a dict")
if isinstance(end_str, dict):
end_str = end_str.get('dateTime')
else:
logger.info(f"End date string not a dict")
info(f"End date string not a dict")
try:
start_date = await loc.dt(start_str) if start_str else None
start_date = await gis.dt(start_str) if start_str else None
except (ValueError, TypeError) as e:
logger.error(f"Invalid start date format: {start_str}, error: {e}")
err(f"Invalid start date format: {start_str}, error: {e}")
continue
try:
end_date = await loc.dt(end_str) if end_str else None
end_date = await gis.dt(end_str) if end_str else None
except (ValueError, TypeError) as e:
logger.error(f"Invalid end date format: {end_str}, error: {e}")
err(f"Invalid end date format: {end_str}, error: {e}")
continue
logger.debug(f"Comparing {start_date} with range {range_start} to {range_end}")
debug(f"Comparing {start_date} with range {range_start} to {range_end}")
if start_date:
# Ensure start_date is timezone-aware
start_date = await loc.dt(start_date)
start_date = await gis.dt(start_date)
# If end_date is not provided, assume it's the same as start_date
if not end_date:
end_date = start_date
else:
end_date = await loc.dt(end_date)
end_date = await gis.dt(end_date)
# Check if the event overlaps with the given range
if (start_date < range_end) and (end_date > range_start):
@ -405,11 +408,11 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
"busy": event.get('showAs', '') in ['busy', 'tentative'],
"all_day": event.get('isAllDay', False)
}
logger.info(f"Event_data: {event_data}")
info(f"Event_data: {event_data}")
event_list.append(event_data)
else:
logger.debug(f"Event outside of specified range: {start_date} to {end_date}")
debug(f"Event outside of specified range: {start_date} to {end_date}")
else:
logger.error(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
err(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
return event_list

View file

@ -13,6 +13,11 @@ import os
cf = APIRouter()
logger = L.get_module_logger("cal")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
class DNSRecordRequest(BaseModel):
full_domain: str
@ -70,7 +75,7 @@ async def retry_request(url, headers, max_retries=5, backoff_factor=1):
response.raise_for_status()
return response
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
logger.error(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
err(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
await sleep(backoff_factor * (2 ** retry))
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")

View file

@ -9,6 +9,11 @@ from sijapi import L, REBOOT_SCRIPT_PATH, HOST_CONFIG, API_CONFIG
dist = APIRouter()
logger = L.get_module_logger("dist")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@dist.get("/update-restart-others")
async def update_and_restart_others():
@ -32,10 +37,10 @@ async def update_and_restart_self(safe: bool = True):
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
logger.info(f"Update and restart initiated for self. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
info(f"Update and restart initiated for self. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
return {"message": "Update and restart process initiated for this server."}
except Exception as e:
logger.error(f"Failed to initiate update and restart for self: {str(e)}")
err(f"Failed to initiate update and restart for self: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to initiate update and restart: {str(e)}")
@dist.get("/update-and-restart-all")
@ -56,5 +61,5 @@ async def ensure_redundancy():
redundancy = True
break
except aiohttp.ClientError:
logger.warning(f"Failed to check health of server {server.id}")
warn(f"Failed to check health of server {server.id}")
return redundancy

View file

@ -21,25 +21,24 @@ import yaml
from typing import List, Dict, Optional, Set
from datetime import datetime as dt_datetime
from sijapi import L, PODCAST_DIR, DEFAULT_VOICE, EMAIL_CONFIG, EMAIL_LOGS
from sijapi.routers import img, loc, tts, llm
from sijapi.routers import gis, img, tts, llm
from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder
email = APIRouter()
logger = L.get_module_logger("email")
print(f"Email logger level: {logger.level}") # Debug print
logger.debug("This is a debug message from email module")
logger.info("This is an info message from email module")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
with open(yaml_path, 'r') as file:
config = yaml.safe_load(file)
return [EmailAccount(**account) for account in config['accounts']]
def get_imap_connection(account: EmailAccount):
return Imbox(account.imap.host,
username=account.imap.username,
@ -59,34 +58,34 @@ def get_smtp_connection(autoresponder: AutoResponder):
if smtp_config.encryption == 'SSL':
try:
logger.debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
return SMTP_SSL(smtp_config.host, smtp_config.port, context=context)
except ssl.SSLError as e:
logger.error(f"SSL connection failed: {str(e)}")
err(f"SSL connection failed: {str(e)}")
# If SSL fails, try TLS
try:
logger.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
smtp = SMTP(smtp_config.host, smtp_config.port)
smtp.starttls(context=context)
return smtp
except Exception as e:
logger.error(f"STARTTLS connection failed: {str(e)}")
err(f"STARTTLS connection failed: {str(e)}")
raise
elif smtp_config.encryption == 'STARTTLS':
try:
logger.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
smtp = SMTP(smtp_config.host, smtp_config.port)
smtp.starttls(context=context)
return smtp
except Exception as e:
logger.error(f"STARTTLS connection failed: {str(e)}")
err(f"STARTTLS connection failed: {str(e)}")
raise
else:
try:
logger.debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
return SMTP(smtp_config.host, smtp_config.port)
except Exception as e:
logger.error(f"Unencrypted connection failed: {str(e)}")
err(f"Unencrypted connection failed: {str(e)}")
raise
async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool:
@ -103,20 +102,20 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment))
message.attach(img)
logger.debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
server = get_smtp_connection(profile)
logger.debug(f"SMTP connection established: {type(server)}")
debug(f"SMTP connection established: {type(server)}")
server.login(profile.smtp.username, profile.smtp.password)
server.send_message(message)
logger.info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
return True
except Exception as e:
logger.error(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
logger.error(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
logger.error(traceback.format_exc())
err(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
err(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
err(traceback.format_exc())
return False
finally:
@ -124,7 +123,7 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
try:
server.quit()
except Exception as e:
logger.error(f"Error closing SMTP connection: {str(e)}")
err(f"Error closing SMTP connection: {str(e)}")
@ -161,15 +160,15 @@ async def process_account_archival(account: EmailAccount):
while True:
try:
processed_uids = await load_processed_uids(summarized_log)
logger.debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
with get_imap_connection(account) as inbox:
unread_messages = inbox.messages(unread=True)
logger.debug(f"There are {len(unread_messages)} unread messages.")
debug(f"There are {len(unread_messages)} unread messages.")
for uid, message in unread_messages:
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
if uid_str not in processed_uids:
recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
localized_datetime = await loc.dt(message.date)
localized_datetime = await gis.dt(message.date)
this_email = IncomingEmail(
sender=message.sent_from[0]['email'],
datetime_received=localized_datetime,
@ -184,13 +183,13 @@ async def process_account_archival(account: EmailAccount):
save_success = await save_email(md_path, md_content)
if save_success:
await save_processed_uid(summarized_log, account.name, uid_str)
logger.info(f"Summarized email: {uid_str}")
info(f"Summarized email: {uid_str}")
else:
logger.warning(f"Failed to summarize {this_email.subject}")
warn(f"Failed to summarize {this_email.subject}")
else:
logger.debug(f"Skipping {uid_str} because it was already processed.")
debug(f"Skipping {uid_str} because it was already processed.")
except Exception as e:
logger.error(f"An error occurred during summarization for account {account.name}: {e}")
err(f"An error occurred during summarization for account {account.name}: {e}")
await asyncio.sleep(account.refresh)
@ -236,7 +235,7 @@ tags:
return markdown_content
except Exception as e:
logger.error(f"Exception: {e}")
err(f"Exception: {e}")
return False
async def save_email(md_path, md_content):
@ -244,14 +243,14 @@ async def save_email(md_path, md_content):
with open(md_path, 'w', encoding='utf-8') as md_file:
md_file.write(md_content)
logger.debug(f"Saved markdown to {md_path}")
debug(f"Saved markdown to {md_path}")
return True
except Exception as e:
logger.error(f"Failed to save email: {e}")
err(f"Failed to save email: {e}")
return False
def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]:
logger.debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
def matches_list(item: str, this_email: IncomingEmail) -> bool:
if '@' in item:
return item in this_email.sender
@ -262,12 +261,12 @@ def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount
whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist)
blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist)
if whitelist_match and not blacklist_match:
logger.debug(f"We have a match for {whitelist_match} and no blacklist matches.")
debug(f"We have a match for {whitelist_match} and no blacklist matches.")
matching_profiles.append(profile)
elif whitelist_match and blacklist_match:
logger.debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
else:
logger.debug(f"No whitelist or blacklist matches.")
debug(f"No whitelist or blacklist matches.")
return matching_profiles
@ -278,30 +277,30 @@ async def process_account_autoresponding(account: EmailAccount):
while True:
try:
processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG)
logger.debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
with get_imap_connection(account) as inbox:
unread_messages = inbox.messages(unread=True)
logger.debug(f"There are {len(unread_messages)} unread messages.")
debug(f"There are {len(unread_messages)} unread messages.")
for uid, message in unread_messages:
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
if uid_str not in processed_uids:
await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG)
else:
logger.debug(f"Skipping {uid_str} because it was already processed.")
debug(f"Skipping {uid_str} because it was already processed.")
except Exception as e:
logger.error(f"An error occurred during auto-responding for account {account.name}: {e}")
err(f"An error occurred during auto-responding for account {account.name}: {e}")
await asyncio.sleep(account.refresh)
async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path):
this_email = await create_incoming_email(message)
logger.debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
matching_profiles = get_matching_autoresponders(this_email, account)
logger.debug(f"Matching profiles: {matching_profiles}")
debug(f"Matching profiles: {matching_profiles}")
for profile in matching_profiles:
response_body = await generate_response(this_email, profile, account)
@ -311,18 +310,18 @@ async def autorespond_single_email(message, uid_str: str, account: EmailAccount,
jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None
success = await send_response(this_email.sender, subject, response_body, profile, jpg_path)
if success:
logger.warning(f"Auto-responded to email: {this_email.subject}")
warn(f"Auto-responded to email: {this_email.subject}")
await save_processed_uid(log_file, account.name, uid_str)
else:
logger.warning(f"Failed to send auto-response to {this_email.subject}")
warn(f"Failed to send auto-response to {this_email.subject}")
else:
logger.warning(f"Unable to generate auto-response for {this_email.subject}")
warn(f"Unable to generate auto-response for {this_email.subject}")
async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]:
logger.info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
now = await loc.dt(dt_datetime.now())
then = await loc.dt(this_email.datetime_received)
now = await gis.dt(dt_datetime.now())
then = await gis.dt(this_email.datetime_received)
age = now - then
usr_prompt = f'''
Generate a personalized auto-response to the following email:
@ -337,7 +336,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
try:
response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400)
logger.debug(f"query_ollama response: {response}")
debug(f"query_ollama response: {response}")
if isinstance(response, dict) and "message" in response and "content" in response["message"]:
response = response["message"]["content"]
@ -345,14 +344,14 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
return response + "\n\n"
except Exception as e:
logger.error(f"Error generating auto-response: {str(e)}")
err(f"Error generating auto-response: {str(e)}")
return None
async def create_incoming_email(message) -> IncomingEmail:
recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
localized_datetime = await loc.dt(message.date)
localized_datetime = await gis.dt(message.date)
return IncomingEmail(
sender=message.sent_from[0]['email'],
datetime_received=localized_datetime,
@ -372,7 +371,6 @@ async def save_processed_uid(filename: Path, account_name: str, uid: str):
async with aiofiles.open(filename, 'a') as f:
await f.write(f"{account_name}:{uid}\n")
async def process_all_accounts():
email_accounts = load_email_accounts(EMAIL_CONFIG)
summarization_tasks = [asyncio.create_task(process_account_archival(account)) for account in email_accounts]

568
sijapi/routers/gis.py Normal file
View file

@ -0,0 +1,568 @@
'''
Uses Postgres/PostGIS for location tracking (data obtained via the companion mobile Pythonista scripts), and for geocoding purposes.
'''
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import HTMLResponse, JSONResponse
import random
from pathlib import Path
import traceback
from datetime import datetime, timezone
from typing import Union, List
import folium
from folium.plugins import HeatMap, MarkerCluster, Search
from folium.plugins import Fullscreen, MiniMap, MousePosition, Geocoder, Draw, MeasureControl
from zoneinfo import ZoneInfo
from dateutil.parser import parse as dateutil_parse
from typing import Optional, List, Union
from sijapi import L, DB, TZ, GEO
from sijapi.classes import Location
from sijapi.utilities import haversine, assemble_journal_path
gis = APIRouter()
logger = L.get_module_logger("gis")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
async def dt(
date_time: Union[str, int, datetime],
tz: Union[str, ZoneInfo, None] = None
) -> datetime:
try:
# Convert integer (epoch time) to UTC datetime
if isinstance(date_time, int):
date_time = datetime.fromtimestamp(date_time, tz=timezone.utc)
debug(f"Converted epoch time {date_time} to UTC datetime object.")
# Convert string to datetime if necessary
elif isinstance(date_time, str):
date_time = dateutil_parse(date_time)
debug(f"Converted string '{date_time}' to datetime object.")
if not isinstance(date_time, datetime):
raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}")
# Ensure the datetime is timezone-aware (UTC if not specified)
if date_time.tzinfo is None:
date_time = date_time.replace(tzinfo=timezone.utc)
debug("Added UTC timezone to naive datetime.")
# Handle provided timezone
if tz is not None:
if isinstance(tz, str):
if tz == "local":
last_loc = await get_timezone_without_timezone(date_time)
tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude)
debug(f"Using local timezone: {tz}")
else:
try:
tz = ZoneInfo(tz)
except Exception as e:
err(f"Invalid timezone string '{tz}'. Error: {e}")
raise ValueError(f"Invalid timezone string: {tz}")
elif isinstance(tz, ZoneInfo):
pass # tz is already a ZoneInfo object
else:
raise ValueError(f"What we needed: tz == 'local', a string, or a ZoneInfo object. What we got: tz, a {type(tz)}, == {tz})")
# Convert to the provided or determined timezone
date_time = date_time.astimezone(tz)
debug(f"Converted datetime to timezone: {tz}")
return date_time
except ValueError as e:
err(f"Error in dt: {e}")
raise
except Exception as e:
err(f"Unexpected error in dt: {e}")
raise ValueError(f"Failed to process datetime: {e}")
async def get_timezone_without_timezone(date_time):
# This is a bit convoluted because we're trying to solve the paradox of needing to
# know the location in order to determine the timezone, but needing the timezone to be
# certain we've chosen the correct location for a provided timezone-naive datetime
# (relevant, e.g., if this datetime coincided with inter-timezone travel).
# Our imperfect solution is to use UTC for an initial location query to determine
# roughly where we were at the time, get that timezone, then check the location again
# applying that timezone to the provided datetime. If the location changed between the
# datetime in UTC and the localized datetime, we'll use the new location's timezone;
# otherwise we'll use the timezone we sourced from the UTC timezone query. But at the
# end of the day it's entirely possible to spend the end of the day twice in two different
# timezones (or none!), so this is a best-effort solution.
# Step 1: Use UTC as an interim timezone to query location
interim_dt = date_time.replace(tzinfo=ZoneInfo("UTC"))
interim_loc = await fetch_last_location_before(interim_dt)
# Step 2: Get a preliminary timezone based on the interim location
interim_tz = await GEO.tz_current((interim_loc.latitude, interim_loc.longitude))
# Step 3: Apply this preliminary timezone and query location again
query_dt = date_time.replace(tzinfo=ZoneInfo(interim_tz))
query_loc = await fetch_last_location_before(query_dt)
# Step 4: Get the final timezone, reusing interim_tz if location hasn't changed
return interim_tz if query_loc == interim_loc else await GEO.tz_current(query_loc.latitude, query_loc.longitude)
async def get_last_location() -> Optional[Location]:
query_datetime = datetime.now(TZ)
debug(f"Query_datetime: {query_datetime}")
this_location = await fetch_last_location_before(query_datetime)
if this_location:
debug(f"location: {this_location}")
return this_location
return None
async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int, datetime, None] = None) -> List[Location]:
start_datetime = await dt(start)
if end is None:
end_datetime = await dt(start_datetime.replace(hour=23, minute=59, second=59))
else:
end_datetime = await dt(end) if not isinstance(end, datetime) else end
if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time():
end_datetime = await dt(end_datetime.replace(hour=23, minute=59, second=59))
debug(f"Fetching locations between {start_datetime} and {end_datetime}")
async with DB.get_connection() as conn:
locations = []
# Check for records within the specified datetime range
range_locations = await conn.fetch('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street,
action, device_type, device_model, device_name, device_os
FROM locations
WHERE datetime >= $1 AND datetime <= $2
ORDER BY datetime DESC
''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None))
debug(f"Range locations query returned: {range_locations}")
locations.extend(range_locations)
if not locations and (end is None or start_datetime.date() == end_datetime.date()):
location_data = await conn.fetchrow('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street,
action, device_type, device_model, device_name, device_os
FROM locations
WHERE datetime < $1
ORDER BY datetime DESC
LIMIT 1
''', start_datetime.replace(tzinfo=None))
debug(f"Fallback query returned: {location_data}")
if location_data:
locations.append(location_data)
debug(f"Locations found: {locations}")
# Sort location_data based on the datetime field in descending order
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
# Create Location objects directly from the location data
location_objects = [
Location(
latitude=location['latitude'],
longitude=location['longitude'],
datetime=location['datetime'],
elevation=location.get('elevation'),
city=location.get('city'),
state=location.get('state'),
zip=location.get('zip'),
street=location.get('street'),
context={
'action': location.get('action'),
'device_type': location.get('device_type'),
'device_model': location.get('device_model'),
'device_name': location.get('device_name'),
'device_os': location.get('device_os')
}
) for location in sorted_locations if location['latitude'] is not None and location['longitude'] is not None
]
return location_objects if location_objects else []
# Function to fetch the last location before the specified datetime
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
datetime = await dt(datetime)
debug(f"Fetching last location before {datetime}")
async with DB.get_connection() as conn:
location_data = await conn.fetchrow('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street, country,
action
FROM locations
WHERE datetime < $1
ORDER BY datetime DESC
LIMIT 1
''', datetime.replace(tzinfo=None))
await conn.close()
if location_data:
debug(f"Last location found: {location_data}")
return Location(**location_data)
else:
debug("No location found before the specified datetime")
return None
@gis.get("/map", response_class=HTMLResponse)
async def generate_map_endpoint(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
max_points: int = Query(32767, description="Maximum number of points to display")
):
try:
if start_date and end_date:
start_date = await dt(start_date)
end_date = await dt(end_date)
else:
start_date, end_date = await get_date_range()
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
info(f"Generating map for {start_date} to {end_date}")
html_content = await generate_map(start_date, end_date, max_points)
return HTMLResponse(content=html_content)
async def get_date_range():
async with DB.get_connection() as conn:
query = "SELECT MIN(datetime) as min_date, MAX(datetime) as max_date FROM locations"
row = await conn.fetchrow(query)
if row and row['min_date'] and row['max_date']:
return row['min_date'], row['max_date']
else:
return datetime(2022, 1, 1), datetime.now()
async def generate_and_save_heatmap(
start_date: Union[str, int, datetime],
end_date: Optional[Union[str, int, datetime]] = None,
output_path: Optional[Path] = None
) -> Path:
"""
Generate a heatmap for the given date range and save it as a PNG file using Folium.
:param start_date: The start date for the map (or the only date if end_date is not provided)
:param end_date: The end date for the map (optional)
:param output_path: The path to save the PNG file (optional)
:return: The path where the PNG file was saved
"""
try:
start_date = await dt(start_date)
if end_date:
end_date = await dt(end_date)
else:
end_date = start_date.replace(hour=23, minute=59, second=59)
# Fetch locations
locations = await fetch_locations(start_date, end_date)
if not locations:
raise ValueError("No locations found for the given date range")
# Create map
m = folium.Map()
# Prepare heatmap data
heat_data = [[loc.latitude, loc.longitude] for loc in locations]
# Add heatmap layer
HeatMap(heat_data).add_to(m)
# Fit the map to the bounds of all locations
bounds = [
[min(loc.latitude for loc in locations), min(loc.longitude for loc in locations)],
[max(loc.latitude for loc in locations), max(loc.longitude for loc in locations)]
]
m.fit_bounds(bounds)
# Generate output path if not provided
if output_path is None:
output_path, relative_path = assemble_journal_path(end_date, filename="map", extension=".png", no_timestamp=True)
# Save the map as PNG
m.save(str(output_path))
info(f"Heatmap saved as PNG: {output_path}")
return output_path
except Exception as e:
err(f"Error generating and saving heatmap: {str(e)}")
raise
async def generate_map(start_date: datetime, end_date: datetime, max_points: int):
locations = await fetch_locations(start_date, end_date)
if not locations:
raise HTTPException(status_code=404, detail="No locations found for the given date range")
info(f"Found {len(locations)} locations for the given date range")
if len(locations) > max_points:
locations = random.sample(locations, max_points)
map_center = [sum(loc.latitude for loc in locations) / len(locations),
sum(loc.longitude for loc in locations) / len(locations)]
m = folium.Map(location=map_center, zoom_start=5)
folium.TileLayer('openstreetmap', name='OpenStreetMap').add_to(m)
folium.TileLayer(
tiles='https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}',
attr='USGS The National Map',
name='USGS Topo'
).add_to(m)
folium.TileLayer(
tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}',
attr='Esri',
name='Esri World Topo'
).add_to(m)
folium.TileLayer('cartodbdark_matter', name='Dark Mode').add_to(m)
# In the generate_map function:
draw = Draw(
draw_options={
'polygon': True,
'rectangle': True,
'circle': True,
'marker': True,
'circlemarker': False,
},
edit_options={'edit': False}
)
draw.add_to(m)
MeasureControl(
position='topright',
primary_length_unit='kilometers',
secondary_length_unit='miles',
primary_area_unit='sqmeters',
secondary_area_unit='acres'
).add_to(m)
m.get_root().html.add_child(folium.Element("""
<script>
var drawnItems = new L.FeatureGroup();
map.addLayer(drawnItems);
map.on(L.Draw.Event.CREATED, function (event) {
var layer = event.layer;
drawnItems.addLayer(layer);
var shape = layer.toGeoJSON();
var points = [];
markerCluster.eachLayer(function (marker) {
if (turf.booleanPointInPolygon(marker.toGeoJSON(), shape)) {
points.push(marker.getLatLng());
}
});
if (points.length > 0) {
alert('Selected ' + points.length + ' points');
console.log(points);
}
});
</script>
"""))
# Add marker cluster
marker_cluster = MarkerCluster(name="Markers").add_to(m)
# Prepare data for heatmap
heat_data = [[loc.latitude, loc.longitude] for loc in locations]
# Add heatmap
HeatMap(heat_data, name="Heatmap").add_to(m)
# Add markers to cluster
for location in locations:
popup_content = f"""
{location.city}, {location.state}<br>
Elevation: {location.elevation}m<br>
Date: {location.datetime}<br>
Action: {location.context.get('action', 'N/A')}<br>
Device: {location.context.get('device_name', 'N/A')} ({location.context.get('device_model', 'N/A')})
"""
folium.Marker(
location=[location.latitude, location.longitude],
popup=popup_content,
tooltip=f"{location.city}, {location.state}"
).add_to(marker_cluster)
# Add controls
Fullscreen().add_to(m)
MiniMap().add_to(m)
MousePosition().add_to(m)
Geocoder().add_to(m)
Draw().add_to(m)
# Add search functionality
Search(
layer=marker_cluster,
geom_type='Point',
placeholder='Search for a location',
collapsed=False,
search_label='city'
).add_to(m)
# Add layer control
folium.LayerControl().add_to(m)
return m.get_root().render()
async def post_location(location: Location):
# if not location.datetime:
# info(f"location appears to be missing datetime: {location}")
# else:
# debug(f"post_location called with {location.datetime}")
async with DB.get_connection() as conn:
try:
context = location.context or {}
action = context.get('action', 'manual')
device_type = context.get('device_type', 'Unknown')
device_model = context.get('device_model', 'Unknown')
device_name = context.get('device_name', 'Unknown')
device_os = context.get('device_os', 'Unknown')
# Parse and localize the datetime
localized_datetime = await dt(location.datetime)
await conn.execute('''
INSERT INTO locations (
datetime, location, city, state, zip, street, action, device_type, device_model, device_name, device_os,
class_, type, name, display_name, amenity, house_number, road, quarter, neighbourhood,
suburb, county, country_code, country
)
VALUES ($1, ST_SetSRID(ST_MakePoint($2, $3, $4), 4326), $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26)
''', localized_datetime, location.longitude, location.latitude, location.elevation, location.city, location.state,
location.zip, location.street, action, device_type, device_model, device_name, device_os,
location.class_, location.type, location.name, location.display_name,
location.amenity, location.house_number, location.road, location.quarter, location.neighbourhood,
location.suburb, location.county, location.country_code, location.country)
await conn.close()
info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}")
return {
'datetime': localized_datetime,
'latitude': location.latitude,
'longitude': location.longitude,
'elevation': location.elevation,
'city': location.city,
'state': location.state,
'zip': location.zip,
'street': location.street,
'action': action,
'device_type': device_type,
'device_model': device_model,
'device_name': device_name,
'device_os': device_os,
'class_': location.class_,
'type': location.type,
'name': location.name,
'display_name': location.display_name,
'amenity': location.amenity,
'house_number': location.house_number,
'road': location.road,
'quarter': location.quarter,
'neighbourhood': location.neighbourhood,
'suburb': location.suburb,
'county': location.county,
'country_code': location.country_code,
'country': location.country
}
except Exception as e:
err(f"Error posting location {e}")
err(traceback.format_exc())
return None
@gis.post("/locate")
async def post_locate_endpoint(locations: Union[Location, List[Location]]):
if isinstance(locations, Location):
locations = [locations]
# Prepare locations
for lcn in locations:
if not lcn.datetime:
tz = await GEO.tz_at(lcn.latitude, lcn.longitude)
lcn.datetime = datetime.now(ZoneInfo(tz)).isoformat()
if not lcn.context:
lcn.context = {
"action": "missing",
"device_type": "API",
"device_model": "Unknown",
"device_name": "Unknown",
"device_os": "Unknown"
}
debug(f"Location received for processing: {lcn}")
geocoded_locations = await GEO.code(locations)
responses = []
if isinstance(geocoded_locations, List):
for location in geocoded_locations:
debug(f"Final location to be submitted to database: {location}")
location_entry = await post_location(location)
if location_entry:
responses.append({"location_data": location_entry})
else:
warn(f"Posting location to database appears to have failed.")
else:
debug(f"Final location to be submitted to database: {geocoded_locations}")
location_entry = await post_location(geocoded_locations)
if location_entry:
responses.append({"location_data": location_entry})
else:
warn(f"Posting location to database appears to have failed.")
return {"message": "Locations and weather updated", "results": responses}
@gis.get("/locate", response_model=Location)
async def get_last_location_endpoint() -> JSONResponse:
this_location = await get_last_location()
if this_location:
location_dict = this_location.model_dump()
location_dict["datetime"] = this_location.datetime.isoformat()
return JSONResponse(content=location_dict)
else:
raise HTTPException(status_code=404, detail="No location found before the specified datetime")
@gis.get("/locate/{datetime_str}", response_model=List[Location])
async def get_locate(datetime_str: str, all: bool = False):
try:
date_time = await dt(datetime_str)
except ValueError as e:
err(f"Invalid datetime string provided: {datetime_str}")
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
locations = await fetch_locations(date_time)
if not locations:
raise HTTPException(status_code=404, detail="No nearby data found for this date and time")
return locations if all else [locations[0]]

View file

@ -12,6 +12,11 @@ from sijapi import L, API, TS_ID, SUBNET_BROADCAST
health = APIRouter(tags=["public", "trusted", "private"])
logger = L.get_module_logger("health")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@health.get("/health")
def get_health():
@ -49,7 +54,7 @@ async def get_wan_ip():
wan_info = response.json()
return wan_info.get('ip', 'Unavailable')
except Exception as e:
logger.error(f"Error fetching WAN IP: {e}")
err(f"Error fetching WAN IP: {e}")
return "Unavailable"
@health.get("/ts_ip")

View file

@ -42,6 +42,11 @@ import base64
ig = APIRouter()
logger = L.get_module_logger("ig")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
class IG_Request(BaseModel):
file: Optional[UploadFile] = None # upload a particular file to Instagram

View file

@ -34,6 +34,12 @@ from sijapi import API, L, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG
img = APIRouter()
logger = L.get_module_logger("img")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
CLIENT_ID = str(uuid.uuid4())
@img.post("/img")
@ -79,12 +85,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
scene_workflow = random.choice(scene_data['workflows'])
if size:
logger.debug(f"Specified size: {size}")
debug(f"Specified size: {size}")
size = size if size else scene_workflow.get('size', '1024x1024')
width, height = map(int, size.split('x'))
logger.debug(f"Parsed width: {width}; parsed height: {height}")
debug(f"Parsed width: {width}; parsed height: {height}")
workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow']
workflow_data = json.loads(workflow_path.read_text())
@ -98,22 +104,22 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
}
saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept)
print(f"Saved file key: {saved_file_key}")
info(f"Saved file key: {saved_file_key}")
prompt_id = await queue_prompt(workflow_data)
print(f"Prompt ID: {prompt_id}")
info(f"Prompt ID: {prompt_id}")
max_size = max(width, height) if downscale_to_fit else None
destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg"
if earlyout:
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
logger.debug(f"Returning {destination_path}")
debug(f"Returning {destination_path}")
return destination_path
else:
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
logger.debug(f"Returning {destination_path}")
debug(f"Returning {destination_path}")
return destination_path
@ -124,10 +130,10 @@ async def generate_and_save_image(prompt_id, saved_file_key, max_size, destinati
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
if Path(jpg_file_path) != Path(destination_path):
logger.error(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
err(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
except Exception as e:
print(f"Error in generate_and_save_image: {e}")
err(f"Error in generate_and_save_image: {e}")
return None
@ -149,7 +155,7 @@ async def poll_status(prompt_id):
status_data = await response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
print(f"{prompt_id} completed in {elapsed_time} seconds.")
info(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
await asyncio.sleep(1)
@ -200,7 +206,7 @@ async def save_as_jpg(image_data, prompt_id, max_size = None, quality = 100, des
return str(destination_path_jpg)
except Exception as e:
print(f"Error processing image: {e}")
err(f"Error processing image: {e}")
return None
@ -216,11 +222,11 @@ def set_presets(workflow_data, preset_values):
if 'inputs' in workflow_data.get(preset_node, {}):
workflow_data[preset_node]['inputs'][preset_key] = preset_value
else:
logger.debug("Node not found in workflow_data")
debug("Node not found in workflow_data")
else:
logger.debug("Required data missing in preset_values")
debug("Required data missing in preset_values")
else:
logger.debug("No preset_values found")
debug("No preset_values found")
def get_return_path(destination_path):
@ -235,7 +241,7 @@ def get_scene(scene):
IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file)
for scene_data in IMG_CONFIG['scenes']:
if scene_data['scene'] == scene:
logger.debug(f"Found scene for \"{scene}\".")
debug(f"Found scene for \"{scene}\".")
return scene_data
return None
@ -254,11 +260,11 @@ def get_matching_scene(prompt):
max_count = count
scene_data = sc
if scene_data:
logger.debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
if scene_data:
return scene_data
else:
logger.debug(f"No matching scenes found, falling back to default scene.")
debug(f"No matching scenes found, falling back to default scene.")
return IMG_CONFIG['scenes'][0]
@ -282,11 +288,11 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
for attempt in range(retries):
try:
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
print("ComfyUI is already running.")
info("ComfyUI is already running.")
return
except (socket.timeout, ConnectionRefusedError):
if attempt == 0: # Only try to start ComfyUI on the first failed attempt
print("ComfyUI is not running. Starting it now...")
warn("ComfyUI is not running. Starting it now...")
try:
tmux_command = (
"tmux split-window -h "
@ -295,13 +301,14 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
"python main.py; exec $SHELL\""
)
subprocess.Popen(tmux_command, shell=True)
print("ComfyUI started in a new tmux session.")
info("ComfyUI started in a new tmux session.")
except Exception as e:
raise RuntimeError(f"Error starting ComfyUI: {e}")
print(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
warn(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
await asyncio.sleep(timeout)
crit(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
# async def upload_and_get_shareable_link(image_path):
@ -326,10 +333,10 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
# shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}"
# return shareable_link
# else:
# logger.error("Could not find the uploaded photo details.")
# err("Could not find the uploaded photo details.")
# return None
# except Exception as e:
# logger.error(f"Error in upload_and_get_shareable_link: {e}")
# err(f"Error in upload_and_get_shareable_link: {e}")
# return None
@ -405,7 +412,7 @@ async def load_workflow(workflow_path: str, workflow:str):
return json.load(file)
async def update_prompt_and_get_key(workf0ow: dict, post: dict, positive: str):
async def update_prompt_and_get_key(workflow: dict, post: dict, positive: str):
'''
Recurses through the workflow searching for and substituting the dynamic values for API_PrePrompt, API_StylePrompt, API_NegativePrompt, width, height, and seed (random integer).
Even more important, it finds and returns the key to the filepath where the file is saved, which we need to decipher status when generation is complete.
@ -436,13 +443,13 @@ Even more important, it finds and returns the key to the filepath where the file
workflow[key] = random.randint(1000000000000, 9999999999999)
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
logger.debug(f"Got a hit for a dimension: {key} {value}")
debug(f"Got a hit for a dimension: {key} {value}")
if value == 1023:
workflow[key] = post.get("width", 1024)
logger.debug(f"Set {key} to {workflow[key]}.")
debug(f"Set {key} to {workflow[key]}.")
elif value == 1025:
workflow[key] = post.get("height", 1024)
logger.debug(f"Set {key} to {workflow[key]}.")
debug(f"Set {key} to {workflow[key]}.")
update_recursive(workflow)
return found_key[0]

View file

@ -33,10 +33,15 @@ from sijapi.routers.asr import transcribe_audio
llm = APIRouter()
logger = L.get_module_logger("llm")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
# Initialize chromadb client
client = chromadb.Client()
OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
# OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"]
# Function to read all markdown files in the folder
@ -48,6 +53,7 @@ def read_markdown_files(folder: Path):
documents.append(file.read())
return documents, file_paths
reimplement='''
# Read markdown files and generate embeddings
documents, file_paths = read_markdown_files(DOC_DIR)
for i, doc in enumerate(documents):
@ -57,7 +63,7 @@ for i, doc in enumerate(documents):
ids=[file_paths[i]],
embeddings=[embedding],
documents=[doc]
)
)'''
# Function to retrieve the most relevant document given a prompt
@llm.get("/retrieve_document/{prompt}")
@ -89,13 +95,13 @@ async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LL
LLM = Ollama()
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
logger.debug(response)
debug(response)
if "message" in response:
if "content" in response["message"]:
content = response["message"]["content"]
return content
else:
logger.debug("No choices found in response")
debug("No choices found in response")
return None
async def query_ollama_multishot(
@ -116,12 +122,12 @@ async def query_ollama_multishot(
LLM = Ollama()
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
logger.debug(response)
debug(response)
if "message" in response and "content" in response["message"]:
return response["message"]["content"]
else:
logger.debug("No content found in response")
debug("No content found in response")
return None
@ -140,21 +146,21 @@ async def chat_completions(request: Request):
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
requested_model = body.get('model', 'default-model')
logger.debug(f"Requested model: {requested_model}")
debug(f"Requested model: {requested_model}")
stream = body.get('stream')
token_limit = body.get('max_tokens') or body.get('num_predict')
# Check if the most recent message contains an image_url
recent_message = messages[-1]
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
logger.debug("Processing as a vision request")
debug("Processing as a vision request")
model = "llava"
logger.debug(f"Using model: {model}")
debug(f"Using model: {model}")
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
else:
logger.debug("Processing as a standard request")
debug("Processing as a standard request")
model = requested_model
logger.debug(f"Using model: {model}")
debug(f"Using model: {model}")
if stream:
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
else:
@ -279,17 +285,17 @@ async def generate_messages(messages: list, model: str = "llama3"):
def is_model_available(model_name):
model_data = OllamaList()
available_models = [model['name'] for model in model_data['models']]
logger.debug(f"Available models: {available_models}") # Log using the configured LOGGER
debug(f"Available models: {available_models}") # Log using the configured LOGGER
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
if len(matching_models) == 1:
logger.debug(f"Unique match found: {matching_models[0]}")
debug(f"Unique match found: {matching_models[0]}")
return True
elif len(matching_models) > 1:
logger.error(f"Ambiguous match found, models: {matching_models}")
err(f"Ambiguous match found, models: {matching_models}")
return True
else:
logger.error(f"No match found for model: {model_name}")
err(f"No match found for model: {model_name}")
return False
@ -412,12 +418,12 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
return first_choice.message.content
else:
logger.debug("No content attribute in the first choice's message")
logger.debug(f"No content found in message string: {response.choices}")
logger.debug("Trying again!")
debug("No content attribute in the first choice's message")
debug(f"No content found in message string: {response.choices}")
debug("Trying again!")
query_gpt4(messages, max_tokens)
else:
logger.debug(f"No content found in message string: {response}")
debug(f"No content found in message string: {response}")
return ""
def llava(image_base64, prompt):
@ -427,7 +433,7 @@ def llava(image_base64, prompt):
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
images = [image_base64]
)
logger.debug(response)
debug(response)
return "" if "pass" in response["response"].lower() else response["response"]
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
@ -458,7 +464,7 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
comment_content = first_choice.message.content
if "PASS" in comment_content:
return ""
logger.debug(f"Generated comment: {comment_content}")
debug(f"Generated comment: {comment_content}")
response_2 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview",
@ -496,15 +502,15 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
first_choice = response_2.choices[0]
if first_choice.message and first_choice.message.content:
final_content = first_choice.message.content
logger.debug(f"Generated comment: {final_content}")
debug(f"Generated comment: {final_content}")
if "PASS" in final_content:
return ""
else:
return final_content
logger.debug("Vision response did not contain expected data.")
logger.debug(f"Vision response: {response_1}")
debug("Vision response did not contain expected data.")
debug(f"Vision response: {response_1}")
asyncio.sleep(15)
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
@ -562,7 +568,7 @@ async def summarize_tts_endpoint(
)
except Exception as e:
logger.error(f"Error in summarize_tts_endpoint: {str(e)}")
err(f"Error in summarize_tts_endpoint: {str(e)}")
return JSONResponse(
status_code=400,
content={"error": str(e)}
@ -589,11 +595,11 @@ async def summarize_tts(
bg_tasks = BackgroundTasks()
model = await tts.get_model(voice)
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
logger.debug(f"summary_tts completed with final_output_path: {final_output_path}")
debug(f"summary_tts completed with final_output_path: {final_output_path}")
return final_output_path
async def get_title(text: str, LLM: Ollama() = None):
async def get_title(text: str, LLM = None):
LLM = LLM if LLM else Ollama()
title = await process_chunk("Generate a title for this text", text, 1, 1, 12, LLM)
title = sanitize_filename(title)
@ -605,10 +611,10 @@ def split_text_into_chunks(text: str) -> List[str]:
sentences = re.split(r'(?<=[.!?])\s+', text)
words = text.split()
total_words = len(words)
logger.debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
logger.debug(f"Maximum words per chunk: {max_words_per_chunk}")
debug(f"Maximum words per chunk: {max_words_per_chunk}")
chunks = []
current_chunk = []
@ -628,7 +634,7 @@ def split_text_into_chunks(text: str) -> List[str]:
if current_chunk:
chunks.append(' '.join(current_chunk))
logger.debug(f"Split text into {len(chunks)} chunks.")
debug(f"Split text into {len(chunks)} chunks.")
return chunks
@ -640,11 +646,11 @@ def calculate_max_tokens(text: str) -> int:
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
logger.info(f"Attempting to extract text from file: {file}")
info(f"Attempting to extract text from file: {file}")
try:
if isinstance(file, UploadFile):
logger.info("File is an UploadFile object")
info("File is an UploadFile object")
file_extension = os.path.splitext(file.filename)[1]
temp_file_path = tempfile.mktemp(suffix=file_extension)
with open(temp_file_path, 'wb') as buffer:
@ -663,7 +669,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
_, file_ext = os.path.splitext(file_path)
file_ext = file_ext.lower()
logger.info(f"File extension: {file_ext}")
info(f"File extension: {file_ext}")
if file_ext == '.pdf':
text_content = await extract_text_from_pdf(file_path)
@ -690,7 +696,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
return text_content
except Exception as e:
logger.error(f"Error extracting text: {str(e)}")
err(f"Error extracting text: {str(e)}")
raise ValueError(f"Error extracting text: {str(e)}")
@ -699,17 +705,17 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
chunked_text = split_text_into_chunks(text)
total_parts = len(chunked_text)
logger.debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
logger.debug(f"Total words count: {total_words_count}")
debug(f"Total words count: {total_words_count}")
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
logger.debug(f"Total tokens count: {total_tokens_count}")
debug(f"Total tokens count: {total_tokens_count}")
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
logger.debug(f"Total summary length: {total_summary_length}")
debug(f"Total summary length: {total_summary_length}")
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
logger.debug(f"Corrected total summary length: {corrected_total_summary_length}")
debug(f"Corrected total summary length: {corrected_total_summary_length}")
summaries = await asyncio.gather(*[
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
@ -720,21 +726,21 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
concatenated_summary = ' '.join(summaries)
logger.debug(f"Concatenated summary: {concatenated_summary}")
logger.debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
debug(f"Concatenated summary: {concatenated_summary}")
debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
if total_parts > 1:
logger.debug(f"Processing the concatenated_summary to smooth the edges...")
debug(f"Processing the concatenated_summary to smooth the edges...")
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
logger.debug(f"Final summary length: {len(final_summary.split())}")
debug(f"Final summary length: {len(final_summary.split())}")
return final_summary
else:
return concatenated_summary
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
# logger.debug(f"Processing chunk: {text}")
# debug(f"Processing chunk: {text}")
LLM = LLM if LLM else Ollama()
words_count = len(text.split())
@ -744,14 +750,14 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
logger.debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
if part and total_parts > 1:
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
else:
prompt = f"{instruction}:\n\n{text}"
logger.debug(f"Starting LLM.generate for part {part} of {total_parts}")
info(f"Starting LLM.generate for part {part} of {total_parts}")
response = await LLM.generate(
model=SUMMARY_MODEL,
prompt=prompt,
@ -760,8 +766,8 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
)
text_response = response['response']
logger.debug(f"Completed LLM.generate for part {part} of {total_parts}")
logger.debug(f"Result: {text_response}")
info(f"Completed LLM.generate for part {part} of {total_parts}")
debug(f"Result: {text_response}")
return text_response
async def title_and_summary(extracted_text: str):

View file

@ -261,11 +261,10 @@ async def generate_map(start_date: datetime, end_date: datetime):
return html_content
async def post_location(location: Location):
if not location.datetime:
logger.debug(f"location appears to be missing datetime: {location}")
else:
logger.debug(f"post_location called with {location.datetime}")
# if not location.datetime:
# logger.debug(f"location appears to be missing datetime: {location}")
# else:
# logger.debug(f"post_location called with {location.datetime}")
async with DB.get_connection() as conn:
try:
context = location.context or {}

View file

@ -5,159 +5,178 @@ import asyncio
import shutil
import requests
from bs4 import BeautifulSoup
from zoneinfo import ZoneInfo
from urllib.parse import urlparse
from datetime import datetime as dt_datetime, timedelta
from typing import Optional
import aiohttp
import aiofiles
import newspaper
import trafilatura
from newspaper import Article
from readability import Document
from markdownify import markdownify as md
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from fastapi import APIRouter, BackgroundTasks, File, UploadFile, Form, HTTPException, Response, Query, Path as FastAPIPath
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from fastapi import APIRouter, BackgroundTasks, UploadFile, Form, HTTPException, Query, Path as FastAPIPath
from pathlib import Path
from sijapi import API, L, Dir, News, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO
from sijapi import L, News, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, DEFAULT_11L_VOICE, DEFAULT_VOICE
from sijapi.utilities import sanitize_filename, assemble_journal_path, assemble_archive_path
from sijapi.routers import llm, tts, asr, loc, note
from sijapi.routers import gis, llm, tts, note
from newspaper import Article
news = APIRouter()
logger = L.get_module_logger("news")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "summary", voice: str = DEFAULT_11L_VOICE):
async def process_and_save_article(
bg_tasks: BackgroundTasks,
url: str,
title: Optional[str] = None,
tts_mode: str = "summary",
voice: str = DEFAULT_VOICE,
site_name: Optional[str] = None
) -> str:
try:
url = article.url
source = trafilatura.fetch_url(url)
# Fetch and parse article
article = await fetch_and_parse_article(url)
if source is None:
# Fallback to newspaper3k if trafilatura fails
article.download()
article.parse()
traf = None
else:
traf = trafilatura.extract_metadata(filecontent=source, default_url=url)
article.download()
article.parse()
# Generate title and file paths
title = sanitize_filename(title or article.title or f"Untitled - {dt_datetime.now().strftime('%Y-%m-%d')}")
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=title, extension=".md")
# Update article properties, preferring trafilatura data when available
article.title = traf.title if traf and traf.title else article.title or url
article.authors = traf.author if traf and traf.author else article.authors or []
article.publish_date = traf.date if traf and traf.date else article.publish_date
try:
article.publish_date = await loc.dt(article.publish_date, "UTC")
except:
logger.debug(f"Failed to localize {article.publish_date}")
article.publish_date = await loc.dt(dt_datetime.now(), "UTC")
article.meta_description = traf.description if traf and traf.description else article.meta_description
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) if source else article.text
article.top_image = traf.image if traf and traf.image else article.top_image
article.source_url = traf.sitename if traf and traf.sitename else urlparse(url).netloc.replace('www.', '').title()
article.meta_keywords = traf.categories or traf.tags if traf else article.meta_keywords or []
article.meta_keywords = article.meta_keywords if isinstance(article.meta_keywords, list) else [article.meta_keywords]
# Generate summary
summary = await generate_summary(article.text)
if not is_article_within_date_range(article, earliest_date):
return False
# Handle TTS
audio_link = await handle_tts(bg_tasks, article, title, tts_mode, voice, summary)
# Generate markdown content
markdown_content = generate_markdown_content(article, title, summary, audio_link, site_name)
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
readable_title = sanitize_filename(article.title or timestamp)
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
# Save markdown file
await save_markdown_file(markdown_filename, markdown_content)
summary = await llm.summarize_text(article.text, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
summary = summary.replace('\n', ' ') # Remove line breaks
if tts_mode == "full" or tts_mode == "content":
tts_text = article.text
elif tts_mode == "summary" or tts_mode == "excerpt":
tts_text = summary
else:
tts_text = None
banner_markdown = ''
try:
banner_url = article.top_image
if banner_url:
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}.jpg"))
if banner_image:
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
except Exception as e:
logger.error(f"No image found in article")
authors = ', '.join(['[[{}]]'.format(author.strip()) for author in article.authors if author.strip()])
if not authors:
authors = '[[Unknown Author]]'
frontmatter = f"""---
title: {readable_title}
authors: {authors}
published: {article.publish_date}
added: {timestamp}
banner: "{banner_markdown}"
tags:
"""
frontmatter += '\n'.join(f" - {tag}" for tag in article.meta_keywords)
frontmatter += '\n---\n'
body = f"# {readable_title}\n\n"
if tts_text:
audio_filename = f"{article.publish_date.strftime('%Y-%m-%d')} {readable_title}"
try:
audio_path = await tts.generate_speech(
bg_tasks=bg_tasks,
text=tts_text,
voice=voice,
model="xtts2",
podcast=True,
title=audio_filename,
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR
)
if isinstance(audio_path, Path):
audio_ext = audio_path.suffix
obsidian_link = f"![[{audio_path.name}]]"
body += f"{obsidian_link}\n\n"
else:
logger.warning(f"Unexpected audio_path type: {type(audio_path)}. Value: {audio_path}")
except Exception as e:
logger.error(f"Failed to generate TTS for {audio_filename}. Error: {str(e)}")
logger.error(f"TTS error details - voice: {voice}, model: eleven_turbo_v2, podcast: True")
logger.error(f"Output directory: {Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR}")
body += f"by {authors} in {article.source_url}\n\n"
body += f"> [!summary]+\n"
body += f"> {summary}\n\n"
body += article.text
markdown_content = frontmatter + body
with open(markdown_filename, 'w') as md_file:
md_file.write(markdown_content)
logger.info(f"Successfully saved to {markdown_filename}")
note.add_to_daily_note(relative_path)
print(f"Saved article: {relative_path}")
return True
# Add to daily note
await note.add_to_daily_note(relative_path)
return f"Successfully saved: {relative_path}"
except Exception as e:
logger.error(f"Error processing article from {article.url}: {str(e)}")
err(f"Failed to process article {url}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
async def fetch_and_parse_article(url: str) -> Article:
source = trafilatura.fetch_url(url)
traf = trafilatura.extract_metadata(filecontent=source, default_url=url)
article = Article(url)
article.set_html(source)
article.parse()
# Update article properties with trafilatura data
article.title = article.title or traf.title or url
article.authors = article.authors or (traf.author if isinstance(traf.author, list) else [traf.author])
article.publish_date = await gis.dt(article.publish_date or traf.date or dt_datetime.now(), "UTC")
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text
article.top_image = article.top_image or traf.image
article.source_url = traf.sitename or urlparse(url).netloc.replace('www.', '').title()
article.meta_keywords = list(set(article.meta_keywords or traf.categories or traf.tags or []))
return article
def is_article_within_date_range(article: Article, days_back: int) -> bool:
earliest_date = dt_datetime.now().date() - timedelta(days=days_back)
return article.publish_date.date() >= earliest_date
async def generate_summary(text: str) -> str:
summary = await llm.summarize_text(text, "Summarize the provided text. Respond with the summary and nothing else.")
return summary.replace('\n', ' ')
async def handle_tts(bg_tasks: BackgroundTasks, article: Article, title: str, tts_mode: str, voice: str, summary: str) -> Optional[str]:
if tts_mode in ["full", "content"]:
tts_text = article.text
elif tts_mode in ["summary", "excerpt"]:
tts_text = summary
else:
return None
audio_filename = f"{article.publish_date.strftime('%Y-%m-%d')} {title}"
try:
audio_path = await tts.generate_speech(
bg_tasks=bg_tasks,
text=tts_text,
voice=voice,
model="xtts",
podcast=True,
title=audio_filename,
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR
)
return f"![[{Path(audio_path).name}]]"
except HTTPException as e:
err(f"Failed to generate TTS: {str(e)}")
return None
def generate_markdown_content(article: Article, title: str, summary: str, audio_link: Optional[str], site_name: Optional[str] = None) -> str:
frontmatter = f"""---
title: {title}
authors: {', '.join(f'[[{author}]]' for author in article.authors)}
published: {article.publish_date}
added: {dt_datetime.now().strftime('%b %d, %Y at %H:%M')}
banner: "{get_banner_markdown(article.top_image)}"
tags:
{chr(10).join(f' - {tag}' for tag in article.meta_keywords)}
"""
if site_name:
frontmatter += f"site: {site_name}\n"
frontmatter += "---\n\n"
body = f"# {title}\n\n"
if audio_link:
body += f"{audio_link}\n\n"
body += f"by {', '.join(article.authors)} in [{article.source_url}]({article.url})\n\n"
body += f"> [!summary]+\n> {summary}\n\n"
body += article.text
return frontmatter + body
def get_banner_markdown(image_url: str) -> str:
if not image_url:
return ''
try:
banner_image = download_file(image_url, Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
return f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" if banner_image else ''
except Exception as e:
err(f"Failed to download banner image: {str(e)}")
return ''
async def save_markdown_file(filename: str, content: str):
async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
await f.write(content)
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "off", voice: str = DEFAULT_11L_VOICE):
try:
url = article.url
parsed_article = await fetch_and_parse_article(url)
if not is_article_within_date_range(parsed_article, earliest_date):
return False
return await process_and_save_article(bg_tasks, url, None, tts_mode, voice, site_name=site_name)
except Exception as e:
err(f"Error processing article from {article.url}: {str(e)}")
return False
# You'll need to update your is_article_within_date_range function:
def is_article_within_date_range(article, earliest_date):
return article.publish_date is not None and article.publish_date.date() >= earliest_date
async def process_news_site(site, bg_tasks: BackgroundTasks):
print(f"Downloading articles from {site.name}...")
info(f"Downloading articles from {site.name}...")
earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back)
@ -179,11 +198,11 @@ async def process_news_site(site, bg_tasks: BackgroundTasks):
results = await asyncio.gather(*tasks)
articles_downloaded = sum(results)
print(f"Downloaded {articles_downloaded} articles from {site.name}")
info(f"Downloaded {articles_downloaded} articles from {site.name}")
except Exception as e:
print(f"Error processing {site.name}: {str(e)}")
err(f"Error processing {site.name}: {str(e)}")
# Update your news_refresh_endpoint function:
@news.get("/news/refresh")
async def news_refresh_endpoint(bg_tasks: BackgroundTasks):
tasks = [process_news_site(site, bg_tasks) for site in News.sites]
@ -192,32 +211,41 @@ async def news_refresh_endpoint(bg_tasks: BackgroundTasks):
async def generate_path(article, site_name):
publish_date = await loc.dt(article.publish_date, 'UTC') if article.publish_date else await loc.dt(dt_datetime.now(), 'UTC')
publish_date = await gis.dt(article.publish_date, 'UTC') if article.publish_date else await gis.dt(dt_datetime.now(), 'UTC')
title_slug = "".join(c if c.isalnum() else "_" for c in article.title)
filename = f"{site_name} - {title_slug[:50]}.md"
absolute_path, relative_path = assemble_journal_path(publish_date, 'Articles', filename, extension='.md', no_timestamp=True)
return absolute_path, relative_path
async def save_article_to_file(content, output_path):
output_path.parent.mkdir(parents=True, exist_ok=True)
async with aiofiles.open(output_path, 'w', encoding='utf-8') as file:
await file.write(content)
### CLIPPER ###
@news.post("/clip")
async def clip_post(
bg_tasks: BackgroundTasks,
url: Optional[str] = Form(None),
source: Optional[str] = Form(None),
url: str = Form(...),
title: Optional[str] = Form(None),
tts: str = Form('summary'),
voice: str = Form(DEFAULT_VOICE),
encoding: str = Form('utf-8')
):
markdown_filename = await process_article(bg_tasks, url, title, encoding, source, tts, voice)
return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
result = await process_and_save_article(bg_tasks, url, title, tts, voice)
return {"message": "Clip saved successfully", "result": result}
@news.get("/clip")
async def clip_get(
bg_tasks: BackgroundTasks,
url: str,
tts: str = Query('summary'),
voice: str = Query(DEFAULT_VOICE)
):
result = await process_and_save_article(bg_tasks, url, None, tts, voice)
return {"message": "Clip saved successfully", "result": result}
@news.post("/archive")
async def archive_post(
@ -229,203 +257,6 @@ async def archive_post(
markdown_filename = await process_archive(url, title, encoding, source)
return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
@news.get("/clip")
async def clip_get(
bg_tasks: BackgroundTasks,
url: str,
tts: str = Query('summary'),
voice: str = Query(DEFAULT_VOICE)
):
parsed_content = await parse_article(url)
markdown_filename = await process_article2(bg_tasks, parsed_content, tts, voice)
return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
async def process_article2(
bg_tasks: BackgroundTasks,
parsed_content: Article,
tts_mode: str = "summary",
voice: str = DEFAULT_11L_VOICE
):
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
readable_title = sanitize_filename(parsed_content.title or timestamp)
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
try:
summary = await llm.summarize_text(parsed_content.clean_doc, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
summary = summary.replace('\n', ' ') # Remove line breaks
if tts_mode == "full" or tts_mode == "content":
tts_text = parsed_content.clean_doc
elif tts_mode == "summary" or tts_mode == "excerpt":
tts_text = summary
else:
tts_text = None
banner_markdown = ''
try:
banner_url = parsed_content.top_image
if banner_url != '':
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
if banner_image:
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
except Exception as e:
logger.error(f"No image found in article")
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.authors)
published_date = parsed_content.publish_date
frontmatter = f"""---
title: {readable_title}
authors: {authors}
published: {published_date}
added: {timestamp}
banner: "{banner_markdown}"
tags:
"""
frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.tags)
frontmatter += '\n---\n'
body = f"# {readable_title}\n\n"
if tts_text:
audio_filename = f"{published_date} {readable_title}"
try:
audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
audio_ext = Path(audio_path).suffix
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n"
except Exception as e:
logger.error(f"Failed to generate TTS for np3k. {e}")
try:
body += f"by {authors} in {parsed_content.canonical_link}" # update with method for getting the newspaper name
body += f"> [!summary]+\n"
body += f"> {summary}\n\n"
body += parsed_content["content"]
markdown_content = frontmatter + body
except Exception as e:
logger.error(f"Failed to combine elements of article markdown.")
try:
with open(markdown_filename, 'w') as md_file:
md_file.write(markdown_content)
logger.info(f"Successfully saved to {markdown_filename}")
note.add_to_daily_note(relative_path)
return markdown_filename
except Exception as e:
logger.error(f"Failed to write markdown file")
raise HTTPException(status_code=500, detail=str(e))
except Exception as e:
logger.error(f"Failed to clip: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
async def process_article(
bg_tasks: BackgroundTasks,
url: str,
title: Optional[str] = None,
encoding: str = 'utf-8',
source: Optional[str] = None,
tts_mode: str = "summary",
voice: str = DEFAULT_11L_VOICE
):
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
parsed_content = await parse_article(url, source)
if parsed_content is None:
return {"error": "Failed to retrieve content"}
readable_title = sanitize_filename(title or parsed_content.get("title") or timestamp)
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
try:
summary = await llm.summarize_text(parsed_content["content"], "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
summary = summary.replace('\n', ' ') # Remove line breaks
if tts_mode == "full" or tts_mode == "content":
tts_text = parsed_content["content"]
elif tts_mode == "summary" or tts_mode == "excerpt":
tts_text = summary
else:
tts_text = None
banner_markdown = ''
try:
banner_url = parsed_content.get('image', '')
if banner_url != '':
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
if banner_image:
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
except Exception as e:
logger.error(f"No image found in article")
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))
frontmatter = f"""---
title: {readable_title}
authors: {', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))}
published: {parsed_content.get('date_published', 'Unknown')}
added: {timestamp}
excerpt: {parsed_content.get('excerpt', '')}
banner: "{banner_markdown}"
tags:
"""
frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.get('tags', []))
frontmatter += '\n---\n'
body = f"# {readable_title}\n\n"
if tts_text:
datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S")
audio_filename = f"{datetime_str} {readable_title}"
try:
audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
audio_ext = Path(audio_path).suffix
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n"
except Exception as e:
logger.error(f"Failed to generate TTS for np3k. {e}")
try:
body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n"
body += f"> [!summary]+\n"
body += f"> {summary}\n\n"
body += parsed_content["content"]
markdown_content = frontmatter + body
except Exception as e:
logger.error(f"Failed to combine elements of article markdown.")
try:
with open(markdown_filename, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content)
logger.info(f"Successfully saved to {markdown_filename}")
note.add_to_daily_note(relative_path)
return markdown_filename
except Exception as e:
logger.error(f"Failed to write markdown file")
raise HTTPException(status_code=500, detail=str(e))
except Exception as e:
logger.error(f"Failed to clip {url}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
async def parse_article(url: str, source: Optional[str] = None) -> Article:
source = source if source else trafilatura.fetch_url(url)
@ -436,7 +267,7 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article:
article.set_html(source)
article.parse()
logger.info(f"Parsed {article.title}")
info(f"Parsed {article.title}")
# Update or set properties based on trafilatura and additional processing
article.title = article.title or traf.title or url
@ -444,10 +275,10 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article:
article.publish_date = article.publish_date or traf.date
try:
article.publish_date = await loc.dt(article.publish_date, "UTC")
article.publish_date = await gis.dt(article.publish_date, "UTC")
except:
logger.debug(f"Failed to localize {article.publish_date}")
article.publish_date = await loc.dt(dt_datetime.now(), "UTC")
debug(f"Failed to localize {article.publish_date}")
article.publish_date = await gis.dt(dt_datetime.now(), "UTC")
article.meta_description = article.meta_description or traf.description
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text
@ -467,7 +298,6 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article:
return article
async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]:
if source:
html_content = source
@ -476,7 +306,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
async with session.get(url) as response:
html_content = await response.text()
else:
logger.error(f"Unable to convert nothing to markdown.")
err(f"Unable to convert nothing to markdown.")
return None
# Use readability to extract the main content
@ -525,12 +355,13 @@ async def process_archive(
markdown_path.parent.mkdir(parents=True, exist_ok=True)
with open(markdown_path, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content)
logger.debug(f"Successfully saved to {markdown_path}")
debug(f"Successfully saved to {markdown_path}")
return markdown_path
except Exception as e:
logger.warning(f"Failed to write markdown file: {str(e)}")
warn(f"Failed to write markdown file: {str(e)}")
return None
def download_file(url, folder):
os.makedirs(folder, exist_ok=True)
filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[-1]
@ -552,16 +383,17 @@ def download_file(url, folder):
with open(filepath, 'wb') as f:
f.write(response.content)
else:
logger.error(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
err(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
return None
else:
logger.error(f"Failed to download image: {url}, status code: {response.status_code}")
err(f"Failed to download image: {url}, status code: {response.status_code}")
return None
except Exception as e:
logger.error(f"Failed to download image: {url}, error: {str(e)}")
err(f"Failed to download image: {url}, error: {str(e)}")
return None
return filename
def copy_file(local_path, folder):
os.makedirs(folder, exist_ok=True)
filename = os.path.basename(local_path)
@ -575,3 +407,188 @@ async def save_file(file: UploadFile, folder: Path) -> Path:
with open(file_path, 'wb') as f:
shutil.copyfileobj(file.file, f)
return file_path
deprecated = '''
async def process_article2(
bg_tasks: BackgroundTasks,
parsed_content: Article,
tts_mode: str = "summary",
voice: str = DEFAULT_11L_VOICE
):
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
readable_title = sanitize_filename(parsed_content.title or timestamp)
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
try:
summary = await llm.summarize_text(parsed_content.clean_doc, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
summary = summary.replace('\n', ' ') # Remove line breaks
if tts_mode == "full" or tts_mode == "content":
tts_text = parsed_content.clean_doc
elif tts_mode == "summary" or tts_mode == "excerpt":
tts_text = summary
else:
tts_text = None
banner_markdown = ''
try:
banner_url = parsed_content.top_image
if banner_url != '':
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
if banner_image:
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
except Exception as e:
err(f"No image found in article")
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.authors)
published_date = parsed_content.publish_date
frontmatter = f"""---
title: {readable_title}
authors: {authors}
published: {published_date}
added: {timestamp}
banner: "{banner_markdown}"
tags:
"""
frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.tags)
frontmatter += '\n---\n'
body = f"# {readable_title}\n\n"
if tts_text:
audio_filename = f"{published_date} {readable_title}"
try:
audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
audio_ext = Path(audio_path).suffix
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n"
except Exception as e:
err(f"Failed to generate TTS for np3k. {e}")
try:
body += f"by {authors} in {parsed_content.canonical_link}" # update with method for getting the newspaper name
body += f"> [!summary]+\n"
body += f"> {summary}\n\n"
body += parsed_content["content"]
markdown_content = frontmatter + body
except Exception as e:
err(f"Failed to combine elements of article markdown.")
try:
with open(markdown_filename, 'w') as md_file:
md_file.write(markdown_content)
info(f"Successfully saved to {markdown_filename}")
await note.add_to_daily_note(relative_path)
return markdown_filename
except Exception as e:
err(f"Failed to write markdown file")
raise HTTPException(status_code=500, detail=str(e))
except Exception as e:
err(f"Failed to clip: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
async def process_article(
bg_tasks: BackgroundTasks,
url: str,
title: Optional[str] = None,
encoding: str = 'utf-8',
source: Optional[str] = None,
tts_mode: str = "summary",
voice: str = DEFAULT_11L_VOICE
):
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
parsed_content = await parse_article(url, source)
if parsed_content is None:
return {"error": "Failed to retrieve content"}
readable_title = sanitize_filename(title or parsed_content.get("title") or timestamp)
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
try:
summary = await llm.summarize_text(parsed_content["content"], "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
summary = summary.replace('\n', ' ') # Remove line breaks
if tts_mode == "full" or tts_mode == "content":
tts_text = parsed_content["content"]
elif tts_mode == "summary" or tts_mode == "excerpt":
tts_text = summary
else:
tts_text = None
banner_markdown = ''
try:
banner_url = parsed_content.get('image', '')
if banner_url != '':
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
if banner_image:
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
except Exception as e:
err(f"No image found in article")
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))
frontmatter = f"""---
title: {readable_title}
authors: {', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))}
published: {parsed_content.get('date_published', 'Unknown')}
added: {timestamp}
excerpt: {parsed_content.get('excerpt', '')}
banner: "{banner_markdown}"
tags:
"""
frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.get('tags', []))
frontmatter += '\n---\n'
body = f"# {readable_title}\n\n"
if tts_text:
datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S")
audio_filename = f"{datetime_str} {readable_title}"
try:
audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
audio_ext = Path(audio_path).suffix
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n"
except Exception as e:
err(f"Failed to generate TTS for np3k. {e}")
try:
body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n"
body += f"> [!summary]+\n"
body += f"> {summary}\n\n"
body += parsed_content["content"]
markdown_content = frontmatter + body
except Exception as e:
err(f"Failed to combine elements of article markdown.")
try:
with open(markdown_filename, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content)
info(f"Successfully saved to {markdown_filename}")
await note.add_to_daily_note(relative_path)
return markdown_filename
except Exception as e:
err(f"Failed to write markdown file")
raise HTTPException(status_code=500, detail=str(e))
except Exception as e:
err(f"Failed to clip {url}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
'''

View file

@ -17,24 +17,28 @@ from fastapi import HTTPException, status
from pathlib import Path
from fastapi import APIRouter, Query, HTTPException
from sijapi import API, L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO
from sijapi.routers import cal, img, loc, tts, llm, time, weather, asr
from sijapi.utilities import assemble_journal_path, assemble_archive_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING
from sijapi.routers import asr, cal, gis, img, llm, serve, time, tts, weather
from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING
from sijapi.classes import Location
note = APIRouter()
logger = L.get_module_logger("note")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@note.post("/note/add")
async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
logger.debug(f"Received request on /note/add...")
debug(f"Received request on /note/add...")
if not file and not text:
logger.warning(f"... without any file or text!")
warn(f"... without any file or text!")
raise HTTPException(status_code=400, detail="Either text or a file must be provided")
else:
result = await process_for_daily_note(file, text, source, bg_tasks)
logger.info(f"Result on /note/add: {result}")
info(f"Result on /note/add: {result}")
return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201)
@ -44,7 +48,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
transcription_entry = ""
file_entry = ""
if file:
logger.debug("File received...")
debug("File received...")
file_content = await file.read()
audio_io = BytesIO(file_content)
@ -52,18 +56,18 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
guessed_type = mimetypes.guess_type(file.filename)
file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream"
logger.debug(f"Processing as {file_type}...")
debug(f"Processing as {file_type}...")
# Extract the main type (e.g., 'audio', 'image', 'video')
main_type = file_type.split('/')[0]
subdir = main_type.title() if main_type else "Documents"
absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
logger.debug(f"Destination path: {absolute_path}")
debug(f"Destination path: {absolute_path}")
with open(absolute_path, 'wb') as f:
f.write(file_content)
logger.debug(f"Processing {f.name}...")
debug(f"Processing {f.name}...")
if main_type == 'audio':
transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
@ -74,7 +78,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
file_entry = f"[Source]({relative_path})"
text_entry = text if text else ""
logger.debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
return await add_to_daily_note(transcription_entry, file_entry, text_entry, now)
@ -169,7 +173,7 @@ added: {timestamp}
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n"
except Exception as e:
logger.error(f"Failed in the TTS portion of clipping: {e}")
err(f"Failed in the TTS portion of clipping: {e}")
body += f"> [!summary]+\n"
body += f"> {summary}\n\n"
@ -182,12 +186,12 @@ added: {timestamp}
with open(markdown_filename, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content)
logger.info(f"Successfully saved to {markdown_filename}")
info(f"Successfully saved to {markdown_filename}")
return markdown_filename
except Exception as e:
logger.error(f"Failed to clip: {str(e)}")
err(f"Failed to clip: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
def list_and_correct_impermissible_files(root_dir, rename: bool = False):
@ -198,7 +202,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
if check_file_name(filename):
file_path = Path(dirpath) / filename
impermissible_files.append(file_path)
logger.debug(f"Impermissible file found: {file_path}")
debug(f"Impermissible file found: {file_path}")
# Sanitize the file name
new_filename = sanitize_filename(filename)
@ -216,7 +220,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
# Rename the file
if rename:
os.rename(file_path, new_file_path)
logger.debug(f"Renamed: {file_path} -> {new_file_path}")
debug(f"Renamed: {file_path} -> {new_file_path}")
return impermissible_files
@ -233,7 +237,7 @@ async def build_daily_note_range_endpoint(dt_start: str, dt_end: str):
results = []
current_date = start_date
while current_date <= end_date:
formatted_date = await loc.dt(current_date)
formatted_date = await gis.dt(current_date)
result = await build_daily_note(formatted_date)
results.append(result)
current_date += timedelta(days=1)
@ -242,6 +246,37 @@ async def build_daily_note_range_endpoint(dt_start: str, dt_end: str):
@note.get("/note/create")
async def build_daily_note_getpoint():
try:
loc = await gis.get_last_location()
if not loc:
raise ValueError("Unable to retrieve last location")
tz = await GEO.tz_current(loc)
if not tz:
raise ValueError(f"Unable to determine timezone for location: {loc}")
date_time = dt_datetime.now(tz)
path = await build_daily_note(date_time, loc.latitude, loc.longitude)
path_str = str(path)
info(f"Successfully created daily note at {path_str}")
return JSONResponse(content={"path": path_str}, status_code=200)
except ValueError as ve:
error_msg = f"Value Error in build_daily_note_getpoint: {str(ve)}"
err(error_msg)
raise HTTPException(status_code=400, detail=error_msg)
except Exception as e:
error_msg = f"Unexpected error in build_daily_note_getpoint: {str(e)}"
err(error_msg)
err(f"Traceback: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail="An unexpected error occurred")
@note.post("/note/create")
async def build_daily_note_endpoint(
date_str: Optional[str] = Form(dt_datetime.now().strftime("%Y-%m-%d")),
@ -258,10 +293,10 @@ async def build_daily_note_endpoint(
else:
raise ValueError("Location is not provided or invalid.")
except (ValueError, AttributeError, TypeError) as e:
logger.warning(f"Falling back to localized datetime due to error: {e}")
warn(f"Falling back to localized datetime due to error: {e}")
try:
date_time = await loc.dt(date_str)
places = await loc.fetch_locations(date_time)
date_time = await gis.dt(date_str)
places = await gis.fetch_locations(date_time)
lat, lon = places[0].latitude, places[0].longitude
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=400)
@ -278,14 +313,14 @@ async def build_daily_note(date_time: dt_datetime, lat: float = None, lon: float
Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match.
'''
absolute_path, _ = assemble_journal_path(date_time)
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting
day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting
day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting
header = f"# [[{day_before}|← ]] {formatted_day} [[{day_after}| →]]\n\n"
if not lat or not lon:
places = await loc.fetch_locations(date_time)
places = await gis.fetch_locations(date_time)
lat, lon = places[0].latitude, places[0].longitude
location = await GEO.code((lat, lon))
@ -308,6 +343,10 @@ Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses th
_, note_path = assemble_journal_path(date_time, filename="Notes", extension=".md", no_timestamp = True)
note_embed = f"![[{note_path}]]"
_, map_path = assemble_journal_path(date_time, filename="Map", extension=".png", no_timestamp = True)
map = await gis.generate_and_save_heatmap(date_time, output_path=map_path)
map_embed = f"![[{map_path}]]"
_, banner_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True)
body = f"""---
@ -320,6 +359,7 @@ created: "{dt_datetime.now().strftime("%Y-%m-%d %H:%M:%S")}"
{header}
{weather_embed}
{map_path}
## Events
{event_embed}
@ -369,7 +409,7 @@ async def update_frontmatter(date_time: dt_datetime, key: str, value: str):
# Check if the file exists
if not file_path.exists():
logger.critical(f"Markdown file not found at {file_path}")
crit(f"Markdown file not found at {file_path}")
raise HTTPException(status_code=404, detail="Markdown file not found.")
# Read the file
@ -416,32 +456,29 @@ async def banner_endpoint(dt: str, location: str = None, forecast: str = None, m
'''
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
'''
logger.debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
date_time = await loc.dt(dt)
logger.debug(f"date_time after localization: {date_time} ({type(date_time)})")
debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
date_time = await gis.dt(dt)
debug(f"date_time after localization: {date_time} ({type(date_time)})")
context = await generate_context(dt, location, forecast, mood, other_context)
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
return jpg_path
async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None):
# logger.debug(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}")
date_time = await loc.dt(dt)
logger.debug(f"generate_banner called with date_time: {date_time}")
date_time = await gis.dt(dt)
destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True)
logger.debug(f"destination path generated: {destination_path}")
if not location or not isinstance(location, Location):
locations = await loc.fetch_locations(date_time)
locations = await gis.fetch_locations(date_time)
if locations:
location = locations[0]
if not forecast:
forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude)
prompt = await generate_context(date_time, location, forecast, mood, other_context)
logger.debug(f"Prompt: {prompt}")
debug(f"Prompt: {prompt}")
final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path)
if not str(local_path) in str(final_path):
logger.info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
jpg_embed = f"\"![[{local_path}]]\""
await update_frontmatter(date_time, "banner", jpg_embed)
return local_path
@ -469,7 +506,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
return await generate_context(date_time, geocoded_location, forecast, mood, other_context)
else:
logger.warning(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
warn(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
elif location and isinstance(location, str):
display_name = f"Location: {location}\n"
else:
@ -507,7 +544,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s
async def get_note(date_time: dt_datetime):
date_time = await loc.dt(date_time);
date_time = await gis.dt(date_time);
absolute_path, local_path = assemble_journal_path(date_time, filename = "Notes", extension = ".md", no_timestamp = True)
if absolute_path.is_file():
@ -536,9 +573,9 @@ async def note_weather_get(
):
force_refresh_weather = refresh == "True"
try:
date_time = dt_datetime.now() if date == "0" else await loc.dt(date)
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
logger.debug(f"date: {date} .. date_time: {date_time}")
date_time = dt_datetime.now() if date == "0" else await gis.dt(date)
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
debug(f"date: {date} .. date_time: {date_time}")
content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon)
return JSONResponse(content={"forecast": content}, status_code=200)
@ -546,14 +583,14 @@ async def note_weather_get(
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
except Exception as e:
logger.error(f"Error in note_weather_get: {str(e)}")
err(f"Error in note_weather_get: {str(e)}")
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
@note.post("/update/note/{date}")
async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse:
date_time = await loc.dt(date)
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
date_time = await gis.dt(date)
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
force_refresh_weather = refresh == "True"
await update_dn_weather(date_time, force_refresh_weather)
await update_daily_note_events(date_time)
@ -561,52 +598,52 @@ async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refres
return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None):
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
try:
if lat and lon:
place = await GEO.code((lat, lon))
else:
logger.debug(f"Updating weather for {date_time}")
places = await loc.fetch_locations(date_time)
debug(f"Updating weather for {date_time}")
places = await gis.fetch_locations(date_time)
place = places[0]
lat = place.latitude
lon = place.longitude
logger.debug(f"lat: {lat}, lon: {lon}, place: {place}")
debug(f"lat: {lat}, lon: {lon}, place: {place}")
city = GEO.find_override_location(lat, lon)
if city:
logger.info(f"Using override location: {city}")
info(f"Using override location: {city}")
else:
if place.city and place.city != "":
city = place.city
logger.info(f"City in data: {city}")
info(f"City in data: {city}")
else:
location = await GEO.code((lat, lon))
logger.debug(f"location: {location}")
debug(f"location: {location}")
city = location.name
city = city if city else location.city
city = city if city else location.house_number + ' ' + location.road
logger.debug(f"City geocoded: {city}")
debug(f"City geocoded: {city}")
# Assemble journal path
absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True)
logger.debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
try:
logger.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
day = await weather.get_weather(date_time, lat, lon, force_refresh)
logger.debug(f"day information obtained from get_weather: {day}")
debug(f"day information obtained from get_weather: {day}")
if day:
DailyWeather = day.get('DailyWeather')
HourlyWeather = day.get('HourlyWeather')
if DailyWeather:
# logger.debug(f"Day: {DailyWeather}")
# debug(f"Day: {DailyWeather}")
icon = DailyWeather.get('icon')
logger.debug(f"Icon: {icon}")
debug(f"Icon: {icon}")
weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather")
@ -675,38 +712,38 @@ async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False,
detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds)
detailed_forecast += f"```\n\n"
logger.debug(f"Detailed forecast: {detailed_forecast}.")
debug(f"Detailed forecast: {detailed_forecast}.")
with open(absolute_path, 'w', encoding='utf-8') as note_file:
note_file.write(detailed_forecast)
logger.debug(f"Operation complete.")
debug(f"Operation complete.")
return narrative
else:
logger.error(f"Failed to get DailyWeather from day: {day}")
err(f"Failed to get DailyWeather from day: {day}")
else:
logger.error(f"Failed to get day")
err(f"Failed to get day")
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
except HTTPException as e:
logger.error(f"HTTP error: {e}")
logger.error(traceback.format_exc())
err(f"HTTP error: {e}")
err(traceback.format_exc())
raise e
except Exception as e:
logger.error(f"Error: {e}")
logger.error(traceback.format_exc())
err(f"Error: {e}")
err(traceback.format_exc())
raise HTTPException(status_code=999, detail=f"Error: {e}")
except ValueError as ve:
logger.error(f"Value error in update_dn_weather: {str(ve)}")
logger.error(traceback.format_exc())
err(f"Value error in update_dn_weather: {str(ve)}")
err(traceback.format_exc())
raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}")
except Exception as e:
logger.error(f"Error in update_dn_weather: {str(e)}")
logger.error(traceback.format_exc())
err(f"Error in update_dn_weather: {str(e)}")
err(traceback.format_exc())
raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}")
def format_hourly_time(hour):
@ -714,8 +751,8 @@ def format_hourly_time(hour):
hour_12 = convert_to_12_hour_format(hour.get("datetime"))
return hour_12
except Exception as e:
logger.error(f"Error in format_hourly_time: {str(e)}")
logger.error(traceback.format_exc())
err(f"Error in format_hourly_time: {str(e)}")
err(traceback.format_exc())
return ""
def format_hourly_icon(hour, sunrise, sunset):
@ -725,7 +762,7 @@ def format_hourly_icon(hour, sunrise, sunset):
precip = hour.get('precip', float(0.0))
precip_prob = hour.get('precipprob', float(0.0))
logger.debug(f"precip: {precip}, prob: {precip_prob}")
debug(f"precip: {precip}, prob: {precip_prob}")
sp_str = None
@ -749,8 +786,8 @@ def format_hourly_icon(hour, sunrise, sunset):
return formatted
except Exception as e:
logger.error(f"Error in format_hourly_special: {str(e)}")
logger.error(traceback.format_exc())
err(f"Error in format_hourly_special: {str(e)}")
err(traceback.format_exc())
return ""
def format_hourly_temperature(hour):
@ -758,8 +795,8 @@ def format_hourly_temperature(hour):
temp_str = f"{hour.get('temp', '')}˚ F"
return temp_str
except Exception as e:
logger.error(f"Error in format_hourly_temperature: {str(e)}")
logger.error(traceback.format_exc())
err(f"Error in format_hourly_temperature: {str(e)}")
err(traceback.format_exc())
return ""
def format_hourly_wind(hour):
@ -769,8 +806,8 @@ def format_hourly_wind(hour):
wind_str = f"{str(windspeed)}:LiWind: {winddir}"
return wind_str
except Exception as e:
logger.error(f"Error in format_hourly_wind: {str(e)}")
logger.error(traceback.format_exc())
err(f"Error in format_hourly_wind: {str(e)}")
err(traceback.format_exc())
return ""
def assemble_hourly_data_table(times, condition_symbols, temps, winds):
@ -783,7 +820,7 @@ def assemble_hourly_data_table(times, condition_symbols, temps, winds):
def get_icon_and_admonition(icon_str) -> Tuple:
logger.debug(f"Received request for emoji {icon_str}")
debug(f"Received request for emoji {icon_str}")
if icon_str.startswith(":") and icon_str.endswith(":"):
return icon_str
@ -884,7 +921,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
total_events = len(event_data["events"])
event_markdown = f"```ad-events"
for event in event_data["events"]:
logger.debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
if not event['name'].startswith('TC '):
url = f"hook://ical/eventID={event['uid']}calendarID=17"
if event['url']:
@ -957,23 +994,23 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
@note.get("/note/events", response_class=PlainTextResponse)
async def note_events_endpoint(date: str = Query(None)):
date_time = await loc.dt(date) if date else await loc.dt(dt_datetime.now())
date_time = await gis.dt(date) if date else await gis.dt(dt_datetime.now())
response = await update_daily_note_events(date_time)
return PlainTextResponse(content=response, status_code=200)
async def update_daily_note_events(date_time: dt_datetime):
logger.debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
try:
events = await cal.get_events(date_time, date_time)
logger.debug(f"Raw events: {events}")
debug(f"Raw events: {events}")
event_data = {
"date": date_time.strftime('%Y-%m-%d'),
"events": events
}
events_markdown = await format_events_as_markdown(event_data)
logger.debug(f"Markdown events: {events_markdown}")
debug(f"Markdown events: {events_markdown}")
absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True)
logger.debug(f"Writing events to file: {absolute_path}")
debug(f"Writing events to file: {absolute_path}")
with open(absolute_path, 'w', encoding='utf-8') as note_file:
note_file.write(events_markdown)
@ -981,7 +1018,7 @@ async def update_daily_note_events(date_time: dt_datetime):
return events_markdown
except Exception as e:
logger.error(f"Error processing events: {e}")
err(f"Error processing events: {e}")
raise HTTPException(status_code=500, detail=str(e))

View file

@ -8,6 +8,11 @@ from sijapi import L
rag = APIRouter()
logger = L.get_module_logger("rag")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
rag.get("/rag/search")
async def rag_search_endpoint(query: str, scope: str):

177
sijapi/routers/scrape.py Normal file
View file

@ -0,0 +1,177 @@
import asyncio
import json
import re
from fastapi import APIRouter, HTTPException
from typing import Dict, List, Any
import aiohttp
import PyPDF2
import io
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from pathlib import Path
from sijapi import Scrape, L, Dir
logger = L.get_module_logger('scrape')
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
scrape = APIRouter()
# Ensure Dir.DATA is a Path object
Dir.DATA = Path(Dir.DATA).expanduser()
def save_to_json(data: List[Dict], output_file: str):
output_path = Dir.DATA / output_file
info(f"Saving data to {output_path}")
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w') as f:
json.dump(data, f, indent=2)
info(f"Data saved successfully to {output_path}")
def load_from_json(output_file: str) -> List[Dict]:
output_path = Dir.DATA / output_file
info(f"Loading data from {output_path}")
try:
with open(output_path, 'r') as f:
return json.load(f)
except FileNotFoundError:
warn(f"File {output_path} not found")
return []
async def fetch_content(config: Any) -> str:
info(f"Fetching content from {config.url}")
if config.content.js_render:
return await fetch_with_selenium(config.url)
async with aiohttp.ClientSession() as session:
async with session.get(config.url) as response:
if config.content.type == 'pdf':
return await handle_pdf(response)
elif config.content.type in ['html', 'xml']:
return await handle_html_xml(response, config.content.selector)
elif config.content.type == 'json':
return await handle_json(response)
elif config.content.type == 'txt':
return await response.text()
else:
warn(f"Unsupported content type: {config.content.type}")
return await response.text()
async def fetch_with_selenium(url: str) -> str:
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)
driver.get(url)
content = driver.page_source
driver.quit()
return content
async def handle_pdf(response):
pdf_content = await response.read()
pdf_file = io.BytesIO(pdf_content)
pdf_reader = PyPDF2.PdfReader(pdf_file)
return "\n".join(page.extract_text() for page in pdf_reader.pages)
async def handle_html_xml(response, selector):
content = await response.text()
soup = BeautifulSoup(content, 'html.parser')
if selector:
return soup.select_one(selector).get_text()
return soup.get_text()
async def handle_json(response):
return await response.json()
def apply_processing_step(data: Any, step: Any) -> Any:
info(f"Applying processing step: {step.type}")
if step.type == 'regex_split':
return re.split(step.pattern, data)[1:]
elif step.type == 'keyword_filter':
return [item for item in data if any(keyword.lower() in str(item).lower() for keyword in step.keywords)]
elif step.type == 'regex_extract':
if isinstance(data, list):
return [apply_regex_extract(item, step.extractions) for item in data]
return apply_regex_extract(data, step.extractions)
debug(f"Unknown processing step type: {step.type}")
return data
def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
debug(f"Applying regex extraction on text of length {len(text)}")
result = {}
for extraction in extractions:
extraction_dict = extraction.dict() if hasattr(extraction, 'dict') else extraction
flags = sum(getattr(re, flag.upper()) for flag in extraction_dict.get('flags', []))
pattern = extraction_dict['pattern']
matches = re.findall(pattern, text, flags=flags)
if matches:
if extraction_dict.get('all_matches', False):
if extraction_dict.get('group_names'):
result[extraction_dict['name']] = [dict(zip(extraction_dict['group_names'], match)) for match in matches]
else:
result[extraction_dict['name']] = matches
else:
result[extraction_dict['name']] = matches[-1].strip() # Take the last match
debug(f"Extracted {len(result)} items")
return result
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
info("Applying post-processing steps")
for step in post_processing:
if step.type == 'custom':
data = globals()[step.function](data)
return data
def data_has_changed(new_data: List[Dict], old_data: List[Dict]) -> bool:
return new_data != old_data
@scrape.get("/scrape/{config_name}")
async def scrape_site(config_name: str):
info(f"Starting scrape operation for {config_name}")
if not hasattr(Scrape, 'configurations'):
# If 'configurations' doesn't exist, assume the entire Scrape object is the configuration
config = Scrape if Scrape.name == config_name else None
else:
config = next((c for c in Scrape.configurations if c.name == config_name), None)
if not config:
raise HTTPException(status_code=404, detail=f"Configuration '{config_name}' not found")
raw_data = await fetch_content(config)
processed_data = raw_data
for step in config.processing:
processed_data = apply_processing_step(processed_data, step)
processed_data = apply_post_processing(processed_data, config.post_processing)
# Resolve Dir.DATA in the output file path
output_file = config.output_file.replace('{{ Dir.DATA }}', str(Dir.DATA))
previous_data = load_from_json(output_file)
if data_has_changed(processed_data, previous_data):
save_to_json(processed_data, output_file)
info("Scrape completed with updates")
return {"message": "Site updated", "data": processed_data}
else:
info("Scrape completed with no updates")
return {"message": "No updates", "data": processed_data}
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
info("Applying post-processing steps")
for step in post_processing:
if step.type == 'regex_extract':
for entry in data:
if step.field in entry:
matches = re.findall(step.pattern, entry[step.field])
if step.all_matches:
entry[step.output_field] = [step.format.format(*match) for match in matches]
elif matches:
entry[step.output_field] = step.format.format(*matches[0])
return data

View file

@ -28,16 +28,22 @@ from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from sijapi import (
L, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
L, API, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, DATA_DIR, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
)
from sijapi.classes import WidgetUpdate
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
from sijapi.routers import loc, note
from sijapi.routers import gis
serve = APIRouter(tags=["public"])
logger = L.get_module_logger("serve")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.err(text)
def crit(text: str): logger.critical(text)
@serve.get("/pgp")
async def get_pgp():
@ -51,7 +57,6 @@ def serve_image(image_name: str):
else:
return {"error": "Image not found"}
def construct_journal_path(date_str: str) -> Path:
try:
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
@ -67,19 +72,20 @@ def is_valid_date(date_str: str) -> bool:
except ValueError:
return False
@serve.get("/notes/{file_path:path}")
async def get_file_endpoint(file_path: str):
try:
date_time = await loc.dt(file_path);
date_time = await gis.dt(file_path);
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
except ValueError as e:
logger.debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
absolute_path = OBSIDIAN_VAULT_DIR / file_path
if not absolute_path.suffix:
absolute_path = Path(absolute_path.with_suffix(".md"))
if not absolute_path.is_file():
logger.warning(f"{absolute_path} is not a valid file it seems.")
warn(f"{absolute_path} is not a valid file it seems.")
elif absolute_path.suffix == '.md':
try:
with open(absolute_path, 'r', encoding='utf-8') as file:
@ -93,19 +99,6 @@ async def get_file_endpoint(file_path: str):
raise HTTPException(status_code=400, detail="Unsupported file type")
with open(CASETABLE_PATH, 'r') as file:
CASETABLE = json.load(file)
class WidgetUpdate(BaseModel):
text: Optional[str] = None
progress: Optional[str] = None
icon: Optional[str] = None
color: Optional[str] = None
url: Optional[str] = None
shortcut: Optional[str] = None
graph: Optional[str] = None
@serve.get("/health_check")
def hook_health():
shellfish_health_check()
@ -130,50 +123,33 @@ async def hook_changedetection(webhook_data: dict):
if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]):
filename = ALERTS_DIR / f"alert_{int(time.time())}.json"
filename.write_text(json.dumps(webhook_data, indent=4))
notify(message)
return {"status": "received"}
@serve.post("/cl/search")
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
client_ip = request.client.host
logger.debug(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
# Save the payload data
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
bg_tasks.add_task(cl_search_process_result, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
@serve.post("/cl/docket")
async def hook_cl_docket(request: Request):
client_ip = request.client.host
logger.debug(f"Received request from IP: {client_ip}")
data = await request.json()
await cl_docket(data, client_ip)
async def notify(alert: str):
fail = True
try:
await notify_shellfish(alert)
if API.EXTENSIONS.shellfish == "on" or API.EXTENSIONS.shellfish == True:
await notify_shellfish(alert)
fail = False
if TS_ID == MAC_ID:
await notify_local(alert)
else:
await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to send alert: {str(e)}")
if API.EXTENSIONS.macnotify == "on" or API.EXTENSIONS.macnotify == True:
if TS_ID == MAC_ID:
await notify_local(alert)
fail = False
else:
await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW)
fail = False
except:
fail = True
return {"message": alert}
if fail == False:
info(f"Delivered alert: {alert}")
return {"message": alert}
else:
crit(f"Failed to deliver alert: {alert}")
return {"message": f"Failed to deliver alert: {alert}"}
async def notify_local(message: str):
await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
@ -194,228 +170,256 @@ async def notify_remote(host: str, message: str, username: str = None, password:
ssh.close()
async def notify_shellfish(alert: str):
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
iv = "ab5bbeb426015da7eedcee8bee3dffb7"
plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n"
if API.EXTENSIONS.shellfish == "on" or API.EXTENSIONS.shellfish == True:
async def notify_shellfish(alert: str):
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
iv = "ab5bbeb426015da7eedcee8bee3dffb7"
plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n"
openssl_command = [
"openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv
]
process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode())
if process.returncode != 0:
raise Exception(f"OpenSSL encryption failed: {stderr.decode()}")
openssl_command = [
"openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv
]
process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode())
if process.returncode != 0:
raise Exception(f"OpenSSL encryption failed: {stderr.decode()}")
base64_encoded = stdout.decode().strip()
base64_encoded = stdout.decode().strip()
url = f"https://secureshellfish.app/push/?user={user}&mutable"
headers = {"Content-Type": "text/plain"}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, data=base64_encoded) as response:
if response.status != 200:
raise Exception(f"Failed to send notification: {response.status_code}")
## SHELLFISH ##
def shellfish_health_check():
addresses = [
"https://api.sij.ai/health",
"http://100.64.64.20:4444/health",
"http://100.64.64.30:4444/health",
"http://100.64.64.11:4444/health",
"http://100.64.64.15:4444/health"
]
results = []
up_count = 0
for address in addresses:
url = f"https://secureshellfish.app/push/?user={user}&mutable"
headers = {"Content-Type": "text/plain"}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, data=base64_encoded) as response:
if response.status != 200:
raise Exception(f"Failed to send notification: {response.status_code}")
def shellfish_health_check():
addresses = [
"https://api.sij.ai/health",
"http://100.64.64.20:4444/health",
"http://100.64.64.30:4444/health",
"http://100.64.64.11:4444/health",
"http://100.64.64.15:4444/health"
]
results = []
up_count = 0
for address in addresses:
try:
response = requests.get(address)
if response.status_code == 200:
results.append(f"{address} is up")
up_count += 1
else:
results.append(f"{address} returned status code {response.status_code}")
except requests.exceptions.RequestException:
results.append(f"{address} is down")
# Generate a simple text-based graph
graph = '|' * up_count + '.' * (len(addresses) - up_count)
text_update = "\n".join(results)
widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"]
output = shellfish_run_widget_command(widget_command)
return {"output": output, "graph": graph}
def shellfish_update_widget(update: WidgetUpdate):
widget_command = ["widget"]
if update.text:
widget_command.extend(["--text", update.text])
if update.progress:
widget_command.extend(["--progress", update.progress])
if update.icon:
widget_command.extend(["--icon", update.icon])
if update.color:
widget_command.extend(["--color", update.color])
if update.url:
widget_command.extend(["--url", update.url])
if update.shortcut:
widget_command.extend(["--shortcut", update.shortcut])
if update.graph:
widget_command.extend(["--text", update.graph])
output = shellfish_run_widget_command(widget_command)
return {"output": output}
def shellfish_run_widget_command(args: List[str]):
result = subprocess.run(args, capture_output=True, text=True, shell=True)
if result.returncode != 0:
raise HTTPException(status_code=500, detail=result.stderr)
return result.stdout
if API.EXTENSIONS.courtlistener == "on" or API.EXTENSIONS.courtlistener == True:
with open(CASETABLE_PATH, 'r') as file:
CASETABLE = json.load(file)
@serve.post("/cl/search")
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
client_ip = request.client.host
debug(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
# Save the payload data
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
bg_tasks.add_task(cl_search_process_result, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
@serve.post("/cl/docket")
async def hook_cl_docket(request: Request):
client_ip = request.client.host
debug(f"Received request from IP: {client_ip}")
data = await request.json()
await cl_docket(data, client_ip)
async def cl_docket(data, client_ip, bg_tasks: BackgroundTasks):
payload = data['payload']
results = data['payload']['results']
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
bg_tasks.add_task(cl_docket_process, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def cl_docket_process(result):
async with httpx.AsyncClient() as session:
await cl_docket_process_result(result, session)
async def cl_docket_process_result(result, session):
docket = str(result.get('docket'))
case_code, case_shortname = cl_case_details(docket)
date_filed = result.get('date_filed', 'No Date Filed')
try:
response = requests.get(address)
if response.status_code == 200:
results.append(f"{address} is up")
up_count += 1
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
except ValueError:
date_filed_formatted = 'NoDateFiled'
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
debug(f"Fetching CourtListener docket information for {docket}...")
data = await response.json()
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name']
debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else:
debug("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket'
case_name = 'NoCaseName'
for document in result.get('recap_documents', []):
filepath_ia = document.get('filepath_ia')
filepath_local = document.get('filepath_local')
if filepath_ia:
file_url = filepath_ia
debug(f"Found IA file at {file_url}.")
elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
debug(f"Found local file at {file_url}.")
else:
results.append(f"{address} returned status code {response.status_code}")
except requests.exceptions.RequestException:
results.append(f"{address} is down")
# Generate a simple text-based graph
graph = '|' * up_count + '.' * (len(addresses) - up_count)
text_update = "\n".join(results)
widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"]
output = shellfish_run_widget_command(widget_command)
return {"output": output, "graph": graph}
debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue
document_number = document.get('document_number', 'NoDocumentNumber')
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
description = description[:50] # Truncate description
# case_shortname = case_name # TEMPORARY OVERRIDE
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(file_url, target_path, session)
debug(f"Downloaded {file_name} to {target_path}")
def cl_case_details(docket):
case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
case_code = case_info.get("code")
short_name = case_info.get("shortname")
return case_code, short_name
async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
async with aiohttp.ClientSession() as session:
debug(f"Attempting to download {url} to {path}.")
try:
async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403:
err(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return
response.raise_for_status()
# Check if the response content type is a PDF
content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf':
err(f"Invalid content type: {content_type}. Skipping download.")
return
# Create an in-memory buffer to store the downloaded content
buffer = io.BytesIO()
async for chunk in response.content.iter_chunked(1024):
buffer.write(chunk)
# Reset the buffer position to the beginning
buffer.seek(0)
# Validate the downloaded PDF content
try:
PdfReader(buffer)
except Exception as e:
err(f"Invalid PDF content: {str(e)}. Skipping download.")
return
# If the PDF is valid, write the content to the file on disk
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('wb') as file:
file.write(buffer.getvalue())
except Exception as e:
err(f"Error downloading file: {str(e)}")
def shellfish_update_widget(update: WidgetUpdate):
widget_command = ["widget"]
async def cl_search_process_result(result):
async with httpx.AsyncClient() as session:
download_url = result.get('download_url')
court_id = result.get('court_id')
case_name_short = result.get('caseNameShort')
case_name = result.get('caseName')
debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
if update.text:
widget_command.extend(["--text", update.text])
if update.progress:
widget_command.extend(["--progress", update.progress])
if update.icon:
widget_command.extend(["--icon", update.icon])
if update.color:
widget_command.extend(["--color", update.color])
if update.url:
widget_command.extend(["--url", update.url])
if update.shortcut:
widget_command.extend(["--shortcut", update.shortcut])
if update.graph:
widget_command.extend(["--text", update.graph])
court_folder = court_id
output = shellfish_run_widget_command(widget_command)
return {"output": output}
def shellfish_run_widget_command(args: List[str]):
result = subprocess.run(args, capture_output=True, text=True, shell=True)
if result.returncode != 0:
raise HTTPException(status_code=500, detail=result.stderr)
return result.stdout
### COURTLISTENER FUNCTIONS ###
async def cl_docket(data, client_ip, bg_tasks: BackgroundTasks):
payload = data['payload']
results = data['payload']['results']
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
bg_tasks.add_task(cl_docket_process, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def cl_docket_process(result):
async with httpx.AsyncClient() as session:
await cl_docket_process_result(result, session)
async def cl_docket_process_result(result, session):
docket = str(result.get('docket'))
case_code, case_shortname = cl_case_details(docket)
date_filed = result.get('date_filed', 'No Date Filed')
try:
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
except ValueError:
date_filed_formatted = 'NoDateFiled'
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
logger.debug(f"Fetching CourtListener docket information for {docket}...")
data = await response.json()
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name']
logger.debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
if case_name_short:
case_folder = case_name_short
else:
logger.debug("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket'
case_name = 'NoCaseName'
case_folder = case_name
for document in result.get('recap_documents', []):
filepath_ia = document.get('filepath_ia')
filepath_local = document.get('filepath_local')
file_name = download_url.split('/')[-1]
target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
if filepath_ia:
file_url = filepath_ia
logger.debug(f"Found IA file at {file_url}.")
elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
logger.debug(f"Found local file at {file_url}.")
else:
logger.debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue
document_number = document.get('document_number', 'NoDocumentNumber')
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
description = description[:50] # Truncate description
# case_shortname = case_name # TEMPORARY OVERRIDE
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(file_url, target_path, session)
logger.debug(f"Downloaded {file_name} to {target_path}")
def cl_case_details(docket):
case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
case_code = case_info.get("code")
short_name = case_info.get("shortname")
return case_code, short_name
async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
async with aiohttp.ClientSession() as session:
logger.debug(f"Attempting to download {url} to {path}.")
try:
async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403:
logger.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return
response.raise_for_status()
# Check if the response content type is a PDF
content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf':
logger.error(f"Invalid content type: {content_type}. Skipping download.")
return
# Create an in-memory buffer to store the downloaded content
buffer = io.BytesIO()
async for chunk in response.content.iter_chunked(1024):
buffer.write(chunk)
# Reset the buffer position to the beginning
buffer.seek(0)
# Validate the downloaded PDF content
try:
PdfReader(buffer)
except Exception as e:
logger.error(f"Invalid PDF content: {str(e)}. Skipping download.")
return
# If the PDF is valid, write the content to the file on disk
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('wb') as file:
file.write(buffer.getvalue())
except Exception as e:
logger.error(f"Error downloading file: {str(e)}")
async def cl_search_process_result(result):
async with httpx.AsyncClient() as session:
download_url = result.get('download_url')
court_id = result.get('court_id')
case_name_short = result.get('caseNameShort')
case_name = result.get('caseName')
logger.debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id
if case_name_short:
case_folder = case_name_short
else:
case_folder = case_name
file_name = download_url.split('/')[-1]
target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(download_url, target_path, session)
logger.debug(f"Downloaded {file_name} to {target_path}")
await cl_download_file(download_url, target_path, session)
debug(f"Downloaded {file_name} to {target_path}")

View file

@ -7,25 +7,27 @@ from semaphore import Bot, ChatContext
from sijapi import L
signal = APIRouter()
logger = L.get_module_logger("signal")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
async def echo(ctx: ChatContext) -> None:
if not ctx.message.empty():
await ctx.message.typing_started()
await ctx.message.reply(ctx.message.get_body())
await ctx.message.typing_stopped()
async def main() -> None:
"""Start the bot."""
# Connect the bot to number.
async with Bot(os.environ["SIGNAL_PHONE_NUMBER"]) as bot:
bot.register_handler("", echo)
# Run the bot until you press Ctrl-C.
await bot.start()
if __name__ == '__main__':
import anyio
anyio.run(main)

View file

@ -26,11 +26,18 @@ from collections import defaultdict
from dotenv import load_dotenv
from traceback import format_exc
from sijapi import L, TIMING_API_KEY, TIMING_API_URL
from sijapi.routers import loc
from sijapi.routers import gis
time = APIRouter(tags=["private"])
logger = L.get_module_logger("time")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
script_directory = os.path.dirname(os.path.abspath(__file__))
# Configuration constants
@ -58,17 +65,17 @@ async def post_time_entry_to_timing(entry: Dict):
'Accept': 'application/json',
'X-Time-Zone': 'America/Los_Angeles'
}
logger.debug(f"Received entry: {entry}")
debug(f"Received entry: {entry}")
response = None # Initialize response
try:
async with httpx.AsyncClient() as client:
response = await client.post(url, headers=headers, json=entry)
response.raise_for_status() # This will only raise for 4xx and 5xx responses
except httpx.HTTPStatusError as exc:
logger.debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
except Exception as exc:
logger.debug(f"General exception caught: {exc}")
debug(f"General exception caught: {exc}")
raise HTTPException(status_code=500, detail="An unexpected error occurred")
if response:
@ -97,8 +104,8 @@ def truncate_project_title(title):
async def fetch_and_prepare_timing_data(start: datetime, end: Optional[datetime] = None) -> List[Dict]:
# start_date = await loc.dt(start)
# end_date = await loc.dt(end) if end else None
# start_date = await gis.dt(start)
# end_date = await gis.dt(end) if end else None
# Adjust the start date to include the day before and format the end date
start_date_adjusted = (start - timedelta(days=1)).strftime("%Y-%m-%dT00:00:00")
end_date_formatted = f"{datetime.strftime(end, '%Y-%m-%d')}T23:59:59" if end else f"{datetime.strftime(start, '%Y-%m-%d')}T23:59:59"
@ -312,8 +319,8 @@ async def get_timing_markdown3(
):
# Fetch and process timing data
start = await loc.dt(start_date)
end = await loc.dt(end_date) if end_date else None
start = await gis.dt(start_date)
end = await gis.dt(end_date) if end_date else None
timing_data = await fetch_and_prepare_timing_data(start, end)
# Retain these for processing Markdown data with the correct timezone
@ -372,8 +379,8 @@ async def get_timing_markdown(
start: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
start_date = await loc.dt(start)
end_date = await loc.dt(end)
start_date = await gis.dt(start)
end_date = await gis.dt(end)
markdown_formatted_data = await process_timing_markdown(start_date, end_date)
return Response(content=markdown_formatted_data, media_type="text/markdown")
@ -441,8 +448,8 @@ async def get_timing_json(
):
# Fetch and process timing data
start = await loc.dt(start_date)
end = await loc.dt(end_date)
start = await gis.dt(start_date)
end = await gis.dt(end_date)
timing_data = await fetch_and_prepare_timing_data(start, end)
# Convert processed data to the required JSON structure

View file

@ -31,6 +31,12 @@ from sijapi.utilities import sanitize_filename
### INITIALIZATIONS ###
tts = APIRouter(tags=["trusted", "private"])
logger = L.get_module_logger("tts")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
DEVICE = torch.device('cpu')
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
@ -47,7 +53,7 @@ async def list_11l_voices():
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers)
logger.debug(f"Response: {response}")
debug(f"Response: {response}")
if response.status_code == 200:
voices_data = response.json().get("voices", [])
formatted_list = ""
@ -57,7 +63,7 @@ async def list_11l_voices():
formatted_list += f"{name}: `{id}`\n"
except Exception as e:
logger.error(f"Error determining voice ID: {str(e)}")
err(f"Error determining voice ID: {str(e)}")
return PlainTextResponse(formatted_list, status_code=200)
@ -67,18 +73,18 @@ async def select_voice(voice_name: str) -> str:
try:
# Case Insensitive comparison
voice_name_lower = voice_name.lower()
logger.debug(f"Looking for {voice_name_lower}")
debug(f"Looking for {voice_name_lower}")
for item in VOICE_DIR.iterdir():
logger.debug(f"Checking {item.name.lower()}")
debug(f"Checking {item.name.lower()}")
if item.name.lower() == f"{voice_name_lower}.wav":
logger.debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
return str(item)
logger.error(f"Voice file not found")
err(f"Voice file not found")
raise HTTPException(status_code=404, detail="Voice file not found")
except Exception as e:
logger.error(f"Voice file not found: {str(e)}")
err(f"Voice file not found: {str(e)}")
return None
@ -113,8 +119,8 @@ async def generate_speech_endpoint(
else:
return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
except Exception as e:
logger.error(f"Error in TTS: {str(e)}")
logger.error(traceback.format_exc())
err(f"Error in TTS: {str(e)}")
err(traceback.format_exc())
raise HTTPException(status_code=666, detail="error in TTS")
async def generate_speech(
@ -136,18 +142,20 @@ async def generate_speech(
model = model if model else await get_model(voice, voice_file)
title = title if title else "TTS audio"
output_path = output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav"
if model == "eleven_turbo_v2":
logger.info("Using ElevenLabs.")
info("Using ElevenLabs.")
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
else: # if model == "xtts":
logger.info("Using XTTS2")
else: # if model == "xtts":
info("Using XTTS2")
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path)
#else:
# raise HTTPException(status_code=400, detail="Invalid model specified")
if podcast == True:
podcast_path = Path(PODCAST_DIR) / audio_file_path.name
logger.debug(f"Podcast path: {podcast_path}")
if not audio_file_path:
raise ValueError("TTS generation failed: audio_file_path is empty or None")
if podcast:
podcast_path = Path(PODCAST_DIR) / Path(audio_file_path).name
debug(f"Podcast path: {podcast_path}")
shutil.copy(str(audio_file_path), str(podcast_path))
bg_tasks.add_task(os.remove, str(audio_file_path))
return str(podcast_path)
@ -155,11 +163,12 @@ async def generate_speech(
return str(audio_file_path)
except Exception as e:
logger.error(f"Failed to generate speech: {str(e)}")
err(f"Failed to generate speech: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
async def get_model(voice: str = None, voice_file: UploadFile = None):
if voice_file or (voice and await select_voice(voice)):
return "xtts"
@ -171,7 +180,7 @@ async def get_model(voice: str = None, voice_file: UploadFile = None):
raise HTTPException(status_code=400, detail="No model or voice specified")
async def determine_voice_id(voice_name: str) -> str:
logger.debug(f"Searching for voice id for {voice_name}")
debug(f"Searching for voice id for {voice_name}")
hardcoded_voices = {
"alloy": "E3A1KVbKoWSIKSZwSUsW",
@ -188,23 +197,23 @@ async def determine_voice_id(voice_name: str) -> str:
if voice_name in hardcoded_voices:
voice_id = hardcoded_voices[voice_name]
logger.debug(f"Found voice ID - {voice_id}")
debug(f"Found voice ID - {voice_id}")
return voice_id
logger.debug(f"Requested voice not among the hardcoded options.. checking with 11L next.")
debug(f"Requested voice not among the hardcoded options.. checking with 11L next.")
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers)
logger.debug(f"Response: {response}")
debug(f"Response: {response}")
if response.status_code == 200:
voices_data = response.json().get("voices", [])
for voice in voices_data:
if voice_name == voice["voice_id"] or voice_name == voice["name"]:
return voice["voice_id"]
except Exception as e:
logger.error(f"Error determining voice ID: {str(e)}")
err(f"Error determining voice ID: {str(e)}")
# as a last fallback, rely on David Attenborough
return "b42GBisbu9r5m5n6pHF7"
@ -248,7 +257,7 @@ async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> s
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
if voice:
logger.debug(f"Looking for voice: {voice}")
debug(f"Looking for voice: {voice}")
selected_voice = await select_voice(voice)
return selected_voice
elif voice_file and isinstance(voice_file, UploadFile):
@ -277,7 +286,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
return str(new_file)
else:
logger.debug(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
debug(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
selected_voice = await select_voice(DEFAULT_VOICE)
return selected_voice
@ -315,7 +324,7 @@ async def local_tts(
for i, segment in enumerate(segments):
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
logger.debug(f"Segment file path: {segment_file_path}")
debug(f"Segment file path: {segment_file_path}")
# Run TTS in a separate thread
await asyncio.to_thread(
@ -326,7 +335,7 @@ async def local_tts(
speaker_wav=[voice_file_path],
language="en"
)
logger.debug(f"Segment file generated: {segment_file_path}")
debug(f"Segment file generated: {segment_file_path}")
# Load and combine audio in a separate thread
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path))
@ -401,7 +410,7 @@ def split_text(text, target_length=35, max_length=50):
if segment_length + len(sentence_words) > max_length:
segments.append(' '.join(current_segment))
logger.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
current_segment = [sentence]
else:
@ -409,7 +418,7 @@ def split_text(text, target_length=35, max_length=50):
if current_segment:
segments.append(' '.join(current_segment))
logger.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
return segments
@ -421,7 +430,7 @@ def clean_text_for_tts(text: str) -> str:
text = re.sub(r'\s+', ' ', text).strip()
return text
else:
logger.debug(f"No text received.")
debug(f"No text received.")

View file

@ -13,10 +13,15 @@ from shapely.wkb import loads
from binascii import unhexlify
from sijapi import L, VISUALCROSSING_API_KEY, TZ, DB, GEO
from sijapi.utilities import haversine
from sijapi.routers import loc
from sijapi.routers import gis
weather = APIRouter()
logger = L.get_module_logger("weather")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@weather.get("/weather/refresh", response_class=JSONResponse)
async def get_refreshed_weather(
@ -26,16 +31,16 @@ async def get_refreshed_weather(
# date = await date
try:
if latlon == "None":
date_time = await loc.dt(date)
place = await loc.fetch_last_location_before(date_time)
date_time = await gis.dt(date)
place = await gis.fetch_last_location_before(date_time)
lat = place.latitude
lon = place.longitude
else:
lat, lon = latlon.split(',')
tz = await GEO.tz_at(lat, lon)
date_time = await loc.dt(date, tz)
date_time = await gis.dt(date, tz)
logger.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
day = await get_weather(date_time, lat, lon, force_refresh=True)
day_str = str(day)
return JSONResponse(content={"weather": day_str}, status_code=200)
@ -44,20 +49,18 @@ async def get_refreshed_weather(
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
except Exception as e:
logger.error(f"Error in note_weather_get: {str(e)}")
err(f"Error in note_weather_get: {str(e)}")
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
async def get_weather(date_time: dt_datetime, latitude: float, longitude: float, force_refresh: bool = False):
logger.debug(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}")
logger.warning(f"Using {date_time} as our datetime in get_weather.")
fetch_new_data = True
if force_refresh == False:
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data:
try:
logger.debug(f"Daily weather data from db: {daily_weather_data}")
debug(f"Daily weather data from db: {daily_weather_data}")
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
last_updated = await loc.dt(last_updated)
last_updated = await gis.dt(last_updated)
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
stored_loc = loads(stored_loc_data)
stored_lat = stored_loc.y
@ -65,68 +68,64 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
stored_ele = stored_loc.z
hourly_weather = daily_weather_data.get('HourlyWeather')
logger.debug(f"Hourly: {hourly_weather}")
logger.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n")
# debug(f"Hourly: {hourly_weather}")
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
logger.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
logger.debug(f"We can use existing data... :')")
debug(f"We can use existing data... :')")
fetch_new_data = False
except Exception as e:
logger.error(f"Error in get_weather: {e}")
err(f"Error in get_weather: {e}")
if fetch_new_data:
logger.debug(f"We require new data!")
debug(f"We require new data!")
request_date_str = date_time.strftime("%Y-%m-%d")
logger.warning(f"Using {date_time.strftime('%Y-%m-%d')} as our datetime for fetching new data.")
debug(f"Using {date_time.strftime('%Y-%m-%d')} as our datetime for fetching new data.")
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
try:
async with AsyncClient() as client:
response = await client.get(url)
if response.status_code == 200:
logger.debug(f"Successfully obtained data from VC...")
debug(f"Successfully obtained data from VC...")
try:
weather_data = response.json()
store_result = await store_weather_to_db(date_time, weather_data)
if store_result == "SUCCESS":
logger.debug(f"New weather data for {request_date_str} stored in database...")
debug(f"New weather data for {request_date_str} stored in database...")
else:
logger.error(f"Failed to store weather data for {request_date_str} in database! {store_result}")
err(f"Failed to store weather data for {request_date_str} in database! {store_result}")
logger.debug(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
debug(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data is not None:
return daily_weather_data
else:
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
except Exception as e:
logger.error(f"Problem parsing VC response or storing data: {e}")
err(f"Problem parsing VC response or storing data: {e}")
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
else:
logger.error(f"Failed to fetch weather data: {response.status_code}, {response.text}")
err(f"Failed to fetch weather data: {response.status_code}, {response.text}")
except Exception as e:
logger.error(f"Exception during API call: {e}")
err(f"Exception during API call: {e}")
return daily_weather_data
async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in store_weather_to_db")
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in store_weather_to_db")
async with DB.get_connection() as conn:
try:
day_data = weather_data.get('days')[0]
logger.debug(f"RAW DAY_DATA: {day_data}")
debug(f"RAW DAY_DATA: {day_data}")
# Handle preciptype and stations as PostgreSQL arrays
preciptype_array = day_data.get('preciptype', []) or []
stations_array = day_data.get('stations', []) or []
date_str = date_time.strftime("%Y-%m-%d")
logger.warning(f"Using {date_str} in our query in store_weather_to_db.")
warn(f"Using {date_str} in our query in store_weather_to_db.")
# Get location details from weather data if available
longitude = weather_data.get('longitude')
@ -135,11 +134,11 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
elevation = await GEO.elevation(latitude, longitude)
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
logger.warning(f"Uncorrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
day_data['datetime'] = await loc.dt(day_data.get('datetimeEpoch'))
day_data['sunrise'] = await loc.dt(day_data.get('sunriseEpoch'))
day_data['sunset'] = await loc.dt(day_data.get('sunsetEpoch'))
logger.warning(f"Corrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
warn(f"Uncorrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
day_data['datetime'] = await gis.dt(day_data.get('datetimeEpoch'))
day_data['sunrise'] = await gis.dt(day_data.get('sunriseEpoch'))
day_data['sunset'] = await gis.dt(day_data.get('sunsetEpoch'))
warn(f"Corrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
daily_weather_params = (
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
@ -163,7 +162,7 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
location_point
)
except Exception as e:
logger.error(f"Failed to prepare database query in store_weather_to_db! {e}")
err(f"Failed to prepare database query in store_weather_to_db! {e}")
try:
daily_weather_query = '''
@ -183,11 +182,11 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
if 'hours' in day_data:
logger.debug(f"Processing hours now...")
debug(f"Processing hours now...")
for hour_data in day_data['hours']:
try:
await asyncio.sleep(0.01)
hour_data['datetime'] = await loc.dt(hour_data.get('datetimeEpoch'))
hour_data['datetime'] = await gis.dt(hour_data.get('datetimeEpoch'))
hour_preciptype_array = hour_data.get('preciptype', []) or []
hour_stations_array = hour_data.get('stations', []) or []
hourly_weather_params = (
@ -229,22 +228,22 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
'''
async with conn.transaction():
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
logger.debug(f"Done processing hourly_weather_id {hourly_weather_id}")
debug(f"Done processing hourly_weather_id {hourly_weather_id}")
except Exception as e:
logger.error(f"EXCEPTION: {e}")
err(f"EXCEPTION: {e}")
except Exception as e:
logger.error(f"EXCEPTION: {e}")
err(f"EXCEPTION: {e}")
return "SUCCESS"
except Exception as e:
logger.error(f"Error in dailyweather storage: {e}")
err(f"Error in dailyweather storage: {e}")
async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float):
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
async with DB.get_connection() as conn:
query_date = date_time.date()
try:
@ -260,19 +259,19 @@ async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude
daily_weather_record = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
if daily_weather_record is None:
logger.debug(f"No daily weather data retrieved from database.")
debug(f"No daily weather data retrieved from database.")
return None
# Convert asyncpg.Record to a mutable dictionary
daily_weather_data = dict(daily_weather_record)
# logger.debug(f"Daily weather data prior to tz corrections: {daily_weather_data}")
# debug(f"Daily weather data prior to tz corrections: {daily_weather_data}")
# Now we can modify the dictionary
# tz = await GEO.tz_at(latitude, longitude)
# daily_weather_data['datetime'] = await loc.dt(daily_weather_data.get('datetime'), tz)
# daily_weather_data['sunrise'] = await loc.dt(daily_weather_data.get('sunrise'), tz)
# daily_weather_data['sunset'] = await loc.dt(daily_weather_data.get('sunset'), tz)
# daily_weather_data['datetime'] = await gis.dt(daily_weather_data.get('datetime'), tz)
# daily_weather_data['sunrise'] = await gis.dt(daily_weather_data.get('sunrise'), tz)
# daily_weather_data['sunset'] = await gis.dt(daily_weather_data.get('sunset'), tz)
# logger.debug(f"Daily weather data after tz corrections: {daily_weather_data}")
# debug(f"Daily weather data after tz corrections: {daily_weather_data}")
# Query to get hourly weather data
query = '''
@ -285,17 +284,17 @@ async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude
hourly_weather_data = []
for record in hourly_weather_records:
hour_data = dict(record)
# hour_data['datetime'] = await loc.dt(hour_data.get('datetime'), tz)
# hour_data['datetime'] = await gis.dt(hour_data.get('datetime'), tz)
hourly_weather_data.append(hour_data)
# logger.debug(f"Hourly weather data after tz corrections: {hourly_weather_data}")
# debug(f"Hourly weather data after tz corrections: {hourly_weather_data}")
day = {
'DailyWeather': daily_weather_data,
'HourlyWeather': hourly_weather_data,
}
# logger.debug(f"day: {day}")
# debug(f"day: {day}")
return day
except Exception as e:
logger.error(f"Unexpected error occurred: {e}")
err(f"Unexpected error occurred: {e}")

View file

@ -30,17 +30,22 @@ from fastapi.security.api_key import APIKeyHeader
from sijapi import L, API, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
logger = L.get_module_logger('utilities')
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
if request.url.path in API.PUBLIC:
return
return
client_ip = ipaddress.ip_address(request.client.host)
trusted_subnets = [ipaddress.ip_network(subnet) for subnet in API.TRUSTED_SUBNETS]
if any(client_ip in subnet for subnet in trusted_subnets):
return
return
# Check header-based API key
if api_key:
@ -63,18 +68,18 @@ def assemble_archive_path(filename: str, extension: str = ".md", date_time: date
day = date_time.strftime(DAY_FMT)
day_short = date_time.strftime(DAY_SHORT_FMT)
timestamp = date_time.strftime("%H%M%S")
# Ensure the extension is preserved
base_name, ext = os.path.splitext(filename)
extension = ext if ext else extension
# Initial sanitization
sanitized_base = sanitize_filename(base_name, '')
filename = f"{day_short} {timestamp} {sanitized_base}{extension}"
relative_path = Path(year) / month / day / filename
absolute_path = ARCHIVE_DIR / relative_path
# Ensure the total path length doesn't exceed MAX_PATH_LENGTH
while len(str(absolute_path)) > MAX_PATH_LENGTH:
# Truncate the sanitized_base, not the full filename
@ -82,7 +87,7 @@ def assemble_archive_path(filename: str, extension: str = ".md", date_time: date
filename = f"{day_short} {timestamp} {sanitized_base}{extension}"
relative_path = Path(year) / month / day / filename
absolute_path = ARCHIVE_DIR / relative_path
return absolute_path, relative_path
@ -111,17 +116,17 @@ def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str
extension = extension if extension.startswith(".") else f".{extension}"
else:
extension = validate_extension(filename, [".md", ".m4a", ".wav", ".aiff", ".flac", ".mp3", ".mp4", ".pdf", ".js", ".json", ".yaml", ".py"]) or ".md"
filename = sanitize_filename(filename)
filename = f"{day_short} {filename}" if no_timestamp else f"{day_short} {timestamp} {filename}"
filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
relative_path = relative_path / filename
else:
logger.debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
return None, None
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
os.makedirs(absolute_path.parent, exist_ok=True)
return absolute_path, relative_path
@ -142,8 +147,8 @@ def f(file):
if hasattr(file, 'read') and callable(file.read):
return file
if isinstance(file, (bytes, bytearray)):
return file
return file
if isinstance(file, Path):
file_path = file
elif isinstance(file, str):
@ -165,16 +170,16 @@ def get_extension(file):
file_path = Path(file.filename)
file_extension = file_path.suffix
return file_extension
except Exception as e:
logger.error(f"Unable to get extension of {file}")
err(f"Unable to get extension of {file}")
raise e
def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
"""Sanitize a string to be used as a safe filename while protecting the file extension."""
logger.debug(f"Filename before sanitization: {text}")
debug(f"Filename before sanitization: {text}")
text = re.sub(r'\s+', ' ', text)
sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text)
@ -186,7 +191,7 @@ def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LE
base_name = base_name[:max_base_length - 5].rstrip()
final_filename = base_name + extension
logger.debug(f"Filename after sanitization: {final_filename}")
debug(f"Filename after sanitization: {final_filename}")
return final_filename
@ -196,16 +201,16 @@ def check_file_name(file_name, max_length=255):
needs_sanitization = False
if len(file_name) > max_length:
logger.debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
needs_sanitization = True
if re.search(ALLOWED_FILENAME_CHARS, file_name):
logger.debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
needs_sanitization = True
if re.search(r'\s{2,}', file_name):
logger.debug(f"Filename contains multiple consecutive spaces: {file_name}")
debug(f"Filename contains multiple consecutive spaces: {file_name}")
needs_sanitization = True
if file_name != file_name.strip():
logger.debug(f"Filename has leading or trailing spaces: {file_name}")
debug(f"Filename has leading or trailing spaces: {file_name}")
needs_sanitization = True
return needs_sanitization
@ -230,7 +235,7 @@ async def extract_text(file_path: str) -> str:
"""Extract text from file."""
if file_path.endswith('.pdf'):
return await extract_text_from_pdf(file_path)
elif file_path.endswith('.docx'):
return await extract_text_from_docx(file_path)
@ -248,13 +253,13 @@ async def ocr_pdf(file_path: str) -> str:
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
return ' '.join(texts)
except Exception as e:
logger.error(f"Error during OCR: {str(e)}")
err(f"Error during OCR: {str(e)}")
return ""
async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path):
logger.error(f"Invalid PDF file: {file_path}")
err(f"Invalid PDF file: {file_path}")
return ""
text = ''
@ -267,12 +272,12 @@ async def extract_text_from_pdf(file_path: str) -> str:
text_content = page.extract_text() + ' ' if page.extract_text() else ''
text += text_content
num_pages = len(reader.pages)
# If text was extracted successfully and it's deemed sufficient, return it
if text and not should_use_ocr(text, num_pages):
return clean_text(text)
except Exception as e:
logger.error(f"Error extracting text with PyPDF2: {str(e)}")
err(f"Error extracting text with PyPDF2: {str(e)}")
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
try:
@ -280,10 +285,10 @@ async def extract_text_from_pdf(file_path: str) -> str:
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
return clean_text(text_pdfminer)
except Exception as e:
logger.error(f"Error extracting text with pdfminer.six: {e}")
err(f"Error extracting text with pdfminer.six: {e}")
# If both methods fail or are deemed insufficient, use OCR as the last resort
logger.debug("Falling back to OCR for text extraction...")
debug("Falling back to OCR for text extraction...")
return await ocr_pdf(file_path)
async def is_valid_pdf(file_path: str) -> bool:
@ -292,12 +297,12 @@ async def is_valid_pdf(file_path: str) -> bool:
kind = filetype.guess(file_path)
return kind.mime == 'application/pdf'
except Exception as e:
logger.error(f"Error checking file type: {e}")
err(f"Error checking file type: {e}")
return False
async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path):
logger.error(f"Invalid PDF file: {file_path}")
err(f"Invalid PDF file: {file_path}")
return ""
text = ''
@ -309,23 +314,23 @@ async def extract_text_from_pdf(file_path: str) -> str:
if text.strip(): # Successfully extracted text
return clean_text(text)
except Exception as e:
logger.error(f"Error extracting text with PyPDF2: {str(e)}")
err(f"Error extracting text with PyPDF2: {str(e)}")
try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer.strip(): # Successfully extracted text
return clean_text(text_pdfminer)
except Exception as e:
logger.error(f"Error extracting text with pdfminer.six: {str(e)}")
err(f"Error extracting text with pdfminer.six: {str(e)}")
# Fall back to OCR
logger.debug("Falling back to OCR for text extraction...")
debug("Falling back to OCR for text extraction...")
try:
images = convert_from_path(file_path)
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
return ' '.join(ocr_texts).strip()
except Exception as e:
logger.error(f"OCR failed: {str(e)}")
err(f"OCR failed: {str(e)}")
return ""
async def extract_text_from_docx(file_path: str) -> str:
@ -333,7 +338,7 @@ async def extract_text_from_docx(file_path: str) -> str:
doc = Document(file_path)
full_text = [paragraph.text for paragraph in doc.paragraphs]
return '\n'.join(full_text)
return await asyncio.to_thread(read_docx, file_path)
# Correcting read_text_file to be asynchronous
@ -345,7 +350,7 @@ def _sync_read_text_file(file_path: str) -> str:
# Actual synchronous file reading operation
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def should_use_ocr(text, num_pages) -> bool:
if not text:
return True # No text was extracted, use OCR
@ -377,7 +382,7 @@ def convert_degrees_to_cardinal(d):
"""
dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
ix = round(d / (360. / len(dirs)))
return dirs[ix % len(dirs)]
return dirs[ix % len(dirs)]
@ -409,7 +414,7 @@ def convert_to_12_hour_format(datetime_obj_or_str):
datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S")
else:
datetime_obj = datetime_obj_or_str
if isinstance(datetime_obj_or_str, str):
time24 = datetime_obj_or_str
else:
@ -427,8 +432,8 @@ def encode_image_to_base64(image_path):
byte_data = output_buffer.getvalue()
base64_str = base64.b64encode(byte_data).decode('utf-8')
return base64_str
else:
logger.debug(f"Error: File does not exist at {image_path}")
else:
debug(f"Error: File does not exist at {image_path}")
def resize_and_convert_image(image_path, max_size=2160, quality=80):
with Image.open(image_path) as img:
@ -458,7 +463,7 @@ def load_geonames_data(path: str):
names=columns,
low_memory=False
)
return data
async def run_ssh_command(server, command):
@ -472,5 +477,5 @@ async def run_ssh_command(server, command):
ssh.close()
return output, error
except Exception as e:
logger.error(f"SSH command failed for server {server.id}: {str(e)}")
raise
err(f"SSH command failed for server {server.id}: {str(e)}")
raise