diff --git a/data/calfire_thp_data.json b/data/calfire_thp_data.json new file mode 100644 index 0000000..f2ee893 --- /dev/null +++ b/data/calfire_thp_data.json @@ -0,0 +1,69 @@ +[ + { + "Land Owner": "SIERRA PACIFIC INDUSTRIES", + "Location": "HBM: T2N R3E S17 ; HBM: \nT2N R3E S18 ; HBM: T2N \nR3E S7 ; HBM: T2N R3E S8", + "PLSS Coordinates": [ + "HBM: T2N R3E S17", + "HBM: T2N R3E S8" + ] + }, + { + "Land Owner": "1/29/2024\n2/8/2024\n \n7/12/2024\n \n515.00\nBALLARD RESERVOIR\n(5526.510202);GRAVEN \nRESERVOIR\n(5526.510301);RALSTON \nGULCH(5526.510201)\nMDBM: T41N R10E S25 ; \nMDBM: T41N R10E S26 ; \nMDBM: T41N R10E S27 ; \nMDBM: T41N R10E S28 ; \nMDBM: T41N R10E S33 ; \nMDBM: T41N R10E S34 ; \nMDBM: T41N R10E S35 ; \nMDBM: T41N R10E S36 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY", + "Location": "MDBM:", + "PLSS Coordinates": [] + }, + { + "Land Owner": "2/16/2024\n2/22/2024\n \n7/26/2024\n \n520.00\nARMENTROUT FLAT \n(5526.620003)\n(5526.620003);JIMMERSON \nSPRING\n(5526.610104);MOSQUITO \nLAKE(5526.420403)\nMDBM: T40N R5E S13 ; \nMDBM: T40N R5E S14 ; \nMDBM: T40N R5E S22 ; \nMDBM: T40N R5E S23 ; \nMDBM: T40N R5E S24 ; \nMDBM: T40N R5E S25 ; \nMDBM: T40N R5E S26 ; \nMDBM: T40N R5E S36 ; \nMDBM: T40N R6E S19 ; \nMDBM: T40N R6E S30 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \nPage: \n7\n of \n10\n\nTimber Harvesting Plans\nHarvest Document\nReceived\nFiled\nApproval\nTentative \nEnd of \nPublic \nComments\nPublic \nComment \nPeriod \nClosed\nTotal\nAcres\nWatershed\nLocation\nLand Owner(s)", + "Location": "MDBM:", + "PLSS Coordinates": [] + }, + { + "Land Owner": "3/5/2024\n3/14/2024\n6/25/2024\n6/21/2024\n6/21/2024\n968.00\nLOWER BUTTE CREEK\n(5526.360103);POISON LAKE\n(5526.360201)\nMDBM: T33N R7E S16 ; \nMDBM: T33N R7E S17 ; \nMDBM: T33N R7E S18 ; \nMDBM: T33N R7E S19 ; \nMDBM: T33N R7E S20 ; \nMDBM: T33N R7E S33 ; \nMDBM: T33N R7E S4 ; \nMDBM: T33N R7E S5 ; \nMDBM: T33N R7E S8 ; \nMDBM: T33N R7E S9 ; \nMDBM: T34N R7E S27 ; \nMDBM: T34N R7E S33 ; \nMDBM: T34N R7E S34 ; \nMDBM: T34N R7E S35 ; \nMDBM: T34N R7E S36 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY", + "Location": "MDBM:", + "PLSS Coordinates": [] + }, + { + "Land Owner": "5/13/2024\n5/23/2024\n \n7/29/2024\n \n351.00\nCEDAR CREEK (1106.400710)\n(1106.400710);COPPER \nCREEK (1106.400704)\n(1106.400704);SQUIRREL \nGULCH(1106.400701)\nMDBM: T36N R7W S3 ; \nMDBM: T37N R7W S21 ; \nMDBM: T37N R7W S22 ; \nMDBM: T37N R7W S27 ; \nMDBM: T37N R7W S33 ; \nMDBM: T37N R7W S35 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY", + "Location": "MDBM:", + "PLSS Coordinates": [] + }, + { + "Land Owner": "4/10/2024\n4/18/2024\n \n5/13/2024\n \n362.00\nCHASE CREEK (8638.000201)\n(8638.000201)\nMDBM: T34N R12E S21 ; \nMDBM: T34N R12E S22 ; \nMDBM: T34N R12E S27 ; \nMDBM: T34N R12E S28 ; \nMDBM: T34N R12E S33 ; \nMDBM: T34N R12E S34 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \nPage: \n8\n of \n10\n\nTimber Harvesting Plans\nHarvest Document\nReceived\nFiled\nApproval\nTentative \nEnd of \nPublic \nComments\nPublic \nComment \nPeriod \nClosed\nTotal\nAcres\nWatershed\nLocation\nLand Owner(s)", + "Location": "MDBM:", + "PLSS Coordinates": [] + }, + { + "Land Owner": "6/28/2024\n7/3/2024\n \n7/29/2024\n \n500.00\nMCCARTY CREEK\n(5509.630203);PANTHER \nSPRING\n(5509.630202);REFUGE\n(5509.630201)\nMDBM: T27N R2E S1 ; \nMDBM: T27N R2E S10 ; \nMDBM: T27N R2E S11 ; \nMDBM: T27N R2E S2 ; \nMDBM: T27N R2E S3 ; \nMDBM: T27N R3E S5 ; \nMDBM: T27N R3E S6 ; \nMDBM: T28N R2E S34 ; \nMDBM: T28N R2E S35 ; \nMDBM: T28N R2E S36 ; \nMDBM: T28N R3E S31 ; \nMDBM: T28N R3E S32 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY", + "Location": "MDBM:", + "PLSS Coordinates": [] + }, + { + "Land Owner": "SIERRA PACIFIC INDUSTRIES", + "Location": "MDBM: T32N R1E S12 ; \nMDBM: T32N R1E S13 ; \nMDBM: T32N R1E S14 ; \nMDBM: T32N R1E S23 ; \nMDBM: T32N R1E S24 ; \nMDBM: T32N R2E S18 ; \nMDBM: T32N R2E S19 ; \nMDBM: T32N R2E S7", + "PLSS Coordinates": [ + "MDBM: T32N R1E S12", + "MDBM: T32N R1E S13", + "MDBM: T32N R1E S14", + "MDBM: T32N R1E S23", + "MDBM: T32N R1E S24", + "MDBM: T32N R2E S18", + "MDBM: T32N R2E S19", + "MDBM: T32N R2E S7" + ] + }, + { + "Land Owner": "7/19/2024\n \n \n8/5/2024\n \n562.00\nBILLIE GULCH\n(1106.400703);CLAWTON \nGULCH\n(1106.400804);HATCHET \nCREEK(1106.400705);HAY \nGULCH\n(1106.400808);NELSON \nCREEK (1106.400702)\n(1106.400702)\nMDBM: T36N R7W S13 ; \nMDBM: T36N R7W S23 ; \nMDBM: T36N R7W S25 ; \nMDBM: T36N R7W S27 ; \nMDBM: T36N R7W S33 ; \nMDBM: T36N R7W S34 ; \nMDBM: T36N R7W S35 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \n2-24NTMP-00004-SHA\n7/19/2024\n \n \n9/2/2024\n \n480.00\nLOWER SODA CREEK\n(5525.210202)\nMDBM: T38N R4W S11 ; \nMDBM: T38N R4W S12 ; \nMDBM: T38N R4W S14 ; \nMDBM: T38N R4W S2 \nCASTLE CRAGS LLC", + "Location": "MDBM: T38N R4W S11 ; \nMDBM: T38N R4W S12 ; \nMDBM: T38N R4W S14 ; \nMDBM: T38N R4W S2 \nCASTLE CRAGS LLC", + "PLSS Coordinates": [ + "MDBM: T38N R4W S11", + "MDBM: T38N R4W S12", + "MDBM: T38N R4W S14", + "MDBM: T38N R4W S2" + ] + }, + { + "Land Owner": "5/31/2024\n6/6/2024\n \n7/19/2024\n \n497.00\nLITTLE SILVER CREEK\n(5514.330206);PEAVINE \nCREEK(5514.330101);UNION \nVALLEY RESERVOIR\n(5514.340301)\nMDBM: T12N R14E S28 ; \nMDBM: T12N R14E S29 ; \nMDBM: T12N R14E S32 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \n81\n7/22/2024 12:18:13 PM\nPage: \n10\n of \n10", + "Location": "MDBM:", + "PLSS Coordinates": [] + } +] \ No newline at end of file diff --git a/sijapi/__init__.py b/sijapi/__init__.py index 26ed05f..8959769 100644 --- a/sijapi/__init__.py +++ b/sijapi/__init__.py @@ -1,16 +1,12 @@ # __init__.py import os -import json -import yaml from pathlib import Path import ipaddress import multiprocessing from dotenv import load_dotenv from dateutil import tz from pathlib import Path -from pydantic import BaseModel -from typing import List, Optional -from .classes import AutoResponder, IMAPConfig, SMTPConfig, EmailAccount, EmailContact, IncomingEmail, Database, Geocoder, APIConfig, Configuration +from .classes import Database, Geocoder, APIConfig, Configuration from .logs import Logger # INITIALization @@ -25,15 +21,15 @@ L = Logger("Central", LOGS_DIR) # API essentials API = APIConfig.load('api', 'secrets') Dir = Configuration.load('dirs') -HOST = f"{API.BIND}:{API.PORT}" +HOST = f"{API.BIND}:{API.PORT}" LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost'] SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255') MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count()) DB = Database.from_env() - -News = Configuration.load('news', 'secrets') IMG = Configuration.load('img', 'secrets') +News = Configuration.load('news', 'secrets') +Scrape = Configuration.load('scrape', 'secrets', Dir) # Directories & general paths ROUTER_DIR = BASE_DIR / "routers" @@ -98,7 +94,6 @@ SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 16384)) SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.") SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.") - # Stable diffusion IMG_DIR = DATA_DIR / "img" / "images" os.makedirs(IMG_DIR, exist_ok=True) @@ -130,7 +125,6 @@ TTS_SEGMENTS_DIR = TTS_DIR / 'segments' os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True) ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") - # Calendar & email account MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False @@ -185,19 +179,17 @@ CADDY_API_KEY = os.getenv("CADDY_API_KEY") # Microsoft Graph MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID') MS365_SECRET = os.getenv('MS365_SECRET') -MS365_TENANT_ID = os.getenv('MS365_TENANT_ID') +MS365_TENANT_ID = os.getenv('MS365_TENANT_ID') MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated -MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated +MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated MS365_KEY = MS365_KEY_PATH.read_text() MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt' MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT') - MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com") MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}" MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect") MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',') - # Maintenance GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour -GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours \ No newline at end of file +GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours diff --git a/sijapi/__main__.py b/sijapi/__main__.py index 3f41e30..9b7e3f9 100755 --- a/sijapi/__main__.py +++ b/sijapi/__main__.py @@ -28,8 +28,18 @@ parser.add_argument('--test', type=str, help='Load only the specified module.') args = parser.parse_args() L.setup_from_args(args) -print(f"Debug modules after setup: {L.debug_modules}") # Debug print - +print(f"Debug modules after setup: {L.debug_modules}") +logger = L.get_module_logger("main") +def debug(text: str): logger.debug(text) +debug(f"Debug message.") +def info(text: str): logger.info(text) +info(f"Info message.") +def warn(text: str): logger.warning(text) +warn(f"Warning message.") +def err(text: str): logger.error(text) +err(f"Error message.") +def crit(text: str): logger.critical(text) +crit(f"Critical message.") app = FastAPI() app.add_middleware( @@ -54,13 +64,13 @@ class SimpleAPIKeyMiddleware(BaseHTTPMiddleware): if api_key_header: api_key_header = api_key_header.lower().split("bearer ")[-1] if api_key_header not in API.KEYS and api_key_query not in API.KEYS: - L.ERR(f"Invalid API key provided by a requester.") + err(f"Invalid API key provided by a requester.") return JSONResponse( status_code=401, content={"detail": "Invalid or missing API key"} ) response = await call_next(request) - # L.DEBUG(f"Request from {client_ip} is complete") + # debug(f"Request from {client_ip} is complete") return response # Add the middleware to your FastAPI app @@ -68,8 +78,8 @@ app.add_middleware(SimpleAPIKeyMiddleware) @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException): - L.ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}") - L.ERR(f"Request: {request.method} {request.url}") + err(f"HTTP Exception: {exc.status_code} - {exc.detail}") + err(f"Request: {request.method} {request.url}") return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) @@ -106,8 +116,8 @@ def main(argv): if args.test: load_router(args.test) else: - L.logger.critical(f"sijapi launched") - L.logger.critical(f"Arguments: {args}") + crit(f"sijapi launched") + crit(f"Arguments: {args}") for module_name in API.MODULES.__fields__: if getattr(API.MODULES, module_name): load_router(module_name) diff --git a/sijapi/classes.py b/sijapi/classes.py index e634a45..1958965 100644 --- a/sijapi/classes.py +++ b/sijapi/classes.py @@ -1,36 +1,27 @@ # classes.py import asyncio import json +import yaml import math import os import re -from concurrent.futures import ThreadPoolExecutor -from contextlib import asynccontextmanager -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar, Type -from zoneinfo import ZoneInfo import aiofiles import aiohttp import asyncpg import reverse_geocoder as rg -import yaml +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar from dotenv import load_dotenv from pydantic import BaseModel, Field, create_model -from srtm import get_data +from concurrent.futures import ThreadPoolExecutor +from contextlib import asynccontextmanager +from datetime import datetime, timedelta, timezone from timezonefinder import TimezoneFinder +from zoneinfo import ZoneInfo +from srtm import get_data T = TypeVar('T', bound='Configuration') - -import os -from pathlib import Path -from typing import Union, Optional, Any, Dict, List -import yaml -import re -from pydantic import BaseModel, create_model -from dotenv import load_dotenv - class Configuration(BaseModel): HOME: Path = Path.home() _dir_config: Optional['Configuration'] = None @@ -40,38 +31,50 @@ class Configuration(BaseModel): yaml_path = cls._resolve_path(yaml_path, 'config') if secrets_path: secrets_path = cls._resolve_path(secrets_path, 'config') - + try: with yaml_path.open('r') as file: config_data = yaml.safe_load(file) - + print(f"Loaded configuration data from {yaml_path}") - + if secrets_path: with secrets_path.open('r') as file: secrets_data = yaml.safe_load(file) print(f"Loaded secrets data from {secrets_path}") - config_data.update(secrets_data) - + + # If config_data is a list, apply secrets to each item + if isinstance(config_data, list): + for item in config_data: + if isinstance(item, dict): + item.update(secrets_data) + else: + config_data.update(secrets_data) + + # If config_data is a list, create a dict with a single key + if isinstance(config_data, list): + config_data = {"configurations": config_data} + # Ensure HOME is set if config_data.get('HOME') is None: config_data['HOME'] = str(Path.home()) print(f"HOME was None in config, set to default: {config_data['HOME']}") - + load_dotenv() - + instance = cls.create_dynamic_model(**config_data) instance._dir_config = dir_config or instance - + resolved_data = instance.resolve_placeholders(config_data) instance = cls.create_dynamic_model(**resolved_data) instance._dir_config = dir_config or instance - + return instance except Exception as e: print(f"Error loading configuration: {str(e)}") raise + @classmethod def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path: base_path = Path(__file__).parent.parent # This will be two levels up from this file @@ -92,28 +95,50 @@ class Configuration(BaseModel): else: return data + def resolve_placeholders(self, data: Any) -> Any: + if isinstance(data, dict): + resolved_data = {k: self.resolve_placeholders(v) for k, v in data.items()} + + # Special handling for directory placeholders + home = Path(resolved_data.get('HOME', self.HOME)).expanduser() + sijapi = home / "workshop" / "sijapi" + data_dir = sijapi / "data" + + resolved_data['HOME'] = str(home) + resolved_data['SIJAPI'] = str(sijapi) + resolved_data['DATA'] = str(data_dir) + + return resolved_data + elif isinstance(data, list): + return [self.resolve_placeholders(v) for v in data] + elif isinstance(data, str): + return self.resolve_string_placeholders(data) + else: + return data + def resolve_string_placeholders(self, value: str) -> Any: pattern = r'\{\{\s*([^}]+)\s*\}\}' matches = re.findall(pattern, value) - + for match in matches: parts = match.split('.') if len(parts) == 1: # Internal reference - replacement = getattr(self._dir_config, parts[0], str(Path.home() / parts[0].lower())) + replacement = getattr(self, parts[0], str(Path.home() / parts[0].lower())) elif len(parts) == 2 and parts[0] == 'Dir': - replacement = getattr(self._dir_config, parts[1], str(Path.home() / parts[1].lower())) + replacement = getattr(self, parts[1], str(Path.home() / parts[1].lower())) elif len(parts) == 2 and parts[0] == 'ENV': replacement = os.getenv(parts[1], '') else: replacement = value # Keep original if not recognized - + value = value.replace('{{' + match + '}}', str(replacement)) - + # Convert to Path if it looks like a file path if isinstance(value, str) and (value.startswith(('/', '~')) or (':' in value and value[1] == ':')): return Path(value).expanduser() return value + @classmethod def create_dynamic_model(cls, **data): for key, value in data.items(): @@ -121,7 +146,7 @@ class Configuration(BaseModel): data[key] = cls.create_dynamic_model(**value) elif isinstance(value, list) and all(isinstance(item, dict) for item in value): data[key] = [cls.create_dynamic_model(**item) for item in value] - + DynamicModel = create_model( f'Dynamic{cls.__name__}', __base__=cls, @@ -133,7 +158,11 @@ class Configuration(BaseModel): extra = "allow" arbitrary_types_allowed = True - +from pydantic import BaseModel, create_model +from typing import Any, Dict, List, Union +from pathlib import Path +import yaml +import re class APIConfig(BaseModel): HOST: str @@ -143,8 +172,10 @@ class APIConfig(BaseModel): PUBLIC: List[str] TRUSTED_SUBNETS: List[str] MODULES: Any # This will be replaced with a dynamic model + EXTENSIONS: Any # This will be replaced with a dynamic model TZ: str KEYS: List[str] + GARBAGE: Dict[str, Any] @classmethod def load(cls, config_path: Union[str, Path], secrets_path: Union[str, Path]): @@ -154,9 +185,9 @@ class APIConfig(BaseModel): # Load main configuration with open(config_path, 'r') as file: config_data = yaml.safe_load(file) - + print(f"Loaded main config: {config_data}") # Debug print - + # Load secrets try: with open(secrets_path, 'r') as file: @@ -168,12 +199,12 @@ class APIConfig(BaseModel): except yaml.YAMLError as e: print(f"Error parsing secrets YAML: {e}") secrets_data = {} - + # Resolve internal placeholders config_data = cls.resolve_placeholders(config_data) - + print(f"Resolved config: {config_data}") # Debug print - + # Handle KEYS placeholder if isinstance(config_data.get('KEYS'), list) and len(config_data['KEYS']) == 1: placeholder = config_data['KEYS'][0] @@ -189,23 +220,29 @@ class APIConfig(BaseModel): print(f"Secret key '{secret_key}' not found in secrets file") else: print(f"Invalid secret placeholder format: {placeholder}") - + # Create dynamic ModulesConfig - modules_data = config_data.get('MODULES', {}) - modules_fields = {} - for key, value in modules_data.items(): - if isinstance(value, str): - modules_fields[key] = (bool, value.lower() == 'on') - elif isinstance(value, bool): - modules_fields[key] = (bool, value) - else: - raise ValueError(f"Invalid value for module {key}: {value}. Must be 'on', 'off', True, or False.") - - DynamicModulesConfig = create_model('DynamicModulesConfig', **modules_fields) - config_data['MODULES'] = DynamicModulesConfig(**modules_data) - + config_data['MODULES'] = cls._create_dynamic_config(config_data.get('MODULES', {}), 'DynamicModulesConfig') + + # Create dynamic ExtensionsConfig + config_data['EXTENSIONS'] = cls._create_dynamic_config(config_data.get('EXTENSIONS', {}), 'DynamicExtensionsConfig') + return cls(**config_data) + @classmethod + def _create_dynamic_config(cls, data: Dict[str, Any], model_name: str): + fields = {} + for key, value in data.items(): + if isinstance(value, str): + fields[key] = (bool, value.lower() == 'on') + elif isinstance(value, bool): + fields[key] = (bool, value) + else: + raise ValueError(f"Invalid value for {key}: {value}. Must be 'on', 'off', True, or False.") + + DynamicConfig = create_model(model_name, **fields) + return DynamicConfig(**data) + @classmethod def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path: base_path = Path(__file__).parent.parent # This will be two levels up from this file @@ -235,23 +272,26 @@ class APIConfig(BaseModel): resolved_data[key] = [resolve_value(item) for item in value] else: resolved_data[key] = resolve_value(value) - + # Resolve BIND separately to ensure HOST and PORT are used if 'BIND' in resolved_data: resolved_data['BIND'] = resolved_data['BIND'].replace('{{ HOST }}', str(resolved_data['HOST'])) resolved_data['BIND'] = resolved_data['BIND'].replace('{{ PORT }}', str(resolved_data['PORT'])) - + return resolved_data def __getattr__(self, name: str) -> Any: - if name == 'MODULES': - return self.__dict__['MODULES'] + if name in ['MODULES', 'EXTENSIONS']: + return self.__dict__[name] return super().__getattr__(name) @property def active_modules(self) -> List[str]: return [module for module, is_active in self.MODULES.__dict__.items() if is_active] + @property + def active_extensions(self) -> List[str]: + return [extension for extension, is_active in self.EXTENSIONS.__dict__.items() if is_active] class Location(BaseModel): @@ -265,7 +305,7 @@ class Location(BaseModel): city: Optional[str] = None state: Optional[str] = None country: Optional[str] = None - context: Optional[Dict[str, Any]] = None + context: Optional[Dict[str, Any]] = None class_: Optional[str] = None type: Optional[str] = None name: Optional[str] = None @@ -286,6 +326,8 @@ class Location(BaseModel): } + + class Geocoder: def __init__(self, named_locs: Union[str, Path] = None, cache_file: Union[str, Path] = 'timezone_cache.json'): self.tf = TimezoneFinder() @@ -319,20 +361,20 @@ class Geocoder: def find_override_location(self, lat: float, lon: float) -> Optional[str]: closest_location = None closest_distance = float('inf') - + for location in self.override_locations: loc_name = location.get("name") loc_lat = location.get("latitude") loc_lon = location.get("longitude") loc_radius = location.get("radius") - + distance = self.haversine(lat, lon, loc_lat, loc_lon) - + if distance <= loc_radius: if distance < closest_distance: closest_distance = distance closest_location = loc_name - + return closest_location async def location(self, lat: float, lon: float): @@ -346,7 +388,7 @@ class Geocoder: async def elevation(self, latitude: float, longitude: float, unit: str = "m") -> float: loop = asyncio.get_running_loop() elevation = await loop.run_in_executor(self.executor, self.srtm_data.get_elevation, latitude, longitude) - + if unit == "m": return elevation elif unit == "km": @@ -362,12 +404,12 @@ class Geocoder: return ZoneInfo(timezone_str) if timezone_str else None - + async def lookup(self, lat: float, lon: float): city, state, country = (await self.location(lat, lon))[0]['name'], (await self.location(lat, lon))[0]['admin1'], (await self.location(lat, lon))[0]['cc'] elevation = await self.elevation(lat, lon) timezone = await self.timezone(lat, lon) - + return { "city": city, "state": state, @@ -379,12 +421,12 @@ class Geocoder: async def code(self, locations: Union[Location, Tuple[float, float], List[Union[Location, Tuple[float, float]]]]) -> Union[Location, List[Location]]: if isinstance(locations, (Location, tuple)): locations = [locations] - + processed_locations = [] for loc in locations: if isinstance(loc, tuple): processed_locations.append(Location( - latitude=loc[0], + latitude=loc[0], longitude=loc[1], datetime=datetime.now(timezone.utc) )) @@ -396,12 +438,11 @@ class Geocoder: raise ValueError(f"Unsupported location type: {type(loc)}") coordinates = [(location.latitude, location.longitude) for location in processed_locations] - + geocode_results = await asyncio.gather(*[self.location(lat, lon) for lat, lon in coordinates]) elevations = await asyncio.gather(*[self.elevation(lat, lon) for lat, lon in coordinates]) timezone_results = await asyncio.gather(*[self.timezone(lat, lon) for lat, lon in coordinates]) - def create_display_name(override_name, result): parts = [] if override_name: @@ -446,13 +487,13 @@ class Geocoder: async def geocode_osm(self, latitude: float, longitude: float, email: str): url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={latitude}&lon={longitude}" headers = { - 'User-Agent': f'sijapi/1.0 ({email})', # replace with your app name and email + 'User-Agent': f'sijapi/1.0 ({email})', } async with aiohttp.ClientSession() as session: async with session.get(url, headers=headers) as response: response.raise_for_status() data = await response.json() - + address = data.get("address", {}) elevation = await self.elevation(latitude, longitude) return Location( @@ -465,7 +506,7 @@ class Geocoder: city=address.get("city"), state=address.get("state"), country=address.get("country"), - context={}, + context={}, class_=data.get("class"), type=data.get("type"), name=data.get("name"), @@ -481,7 +522,6 @@ class Geocoder: timezone=await self.timezone(latitude, longitude) ) - def round_coords(self, lat: float, lon: float, decimal_places: int = 2) -> Tuple[float, float]: return (round(lat, decimal_places), round(lon, decimal_places)) @@ -501,17 +541,17 @@ class Geocoder: not self.last_update or current_time - self.last_update > timedelta(hours=1) or not self.coords_equal(rounded_location, self.round_coords(*self.last_location) if self.last_location else (None, None))): - - + + new_timezone = await self.timezone(lat, lon) self.last_timezone = new_timezone self.last_update = current_time self.last_location = (lat, lon) # Store the original, non-rounded coordinates await self.tz_save() - + return self.last_timezone - + async def tz_save(self): cache_data = { 'last_timezone': str(self.last_timezone) if self.last_timezone else None, @@ -528,7 +568,7 @@ class Geocoder: self.last_timezone = ZoneInfo(cache_data['last_timezone']) if cache_data.get('last_timezone') else None self.last_update = datetime.fromisoformat(cache_data['last_update']) if cache_data.get('last_update') else None self.last_location = tuple(cache_data['last_location']) if cache_data.get('last_location') else None - + except (FileNotFoundError, json.JSONDecodeError): # If file doesn't exist or is invalid, we'll start fresh self.last_timezone = None @@ -546,7 +586,7 @@ class Geocoder: async def tz_at(self, lat: float, lon: float) -> Optional[ZoneInfo]: """ Get the timezone at a specific latitude and longitude without affecting the cache. - + :param lat: Latitude :param lon: Longitude :return: ZoneInfo object representing the timezone @@ -556,7 +596,6 @@ class Geocoder: def __del__(self): self.executor.shutdown() - class Database(BaseModel): host: str = Field(..., description="Database host") port: int = Field(5432, description="Database port") @@ -596,7 +635,6 @@ class Database(BaseModel): def to_dict(self): return self.dict(exclude_none=True) - class IMAPConfig(BaseModel): username: str password: str @@ -621,7 +659,7 @@ class AutoResponder(BaseModel): image_prompt: Optional[str] = None image_scene: Optional[str] = None smtp: SMTPConfig - + class EmailAccount(BaseModel): name: str refresh: int @@ -643,3 +681,12 @@ class IncomingEmail(BaseModel): subject: str body: str attachments: List[dict] = [] + +class WidgetUpdate(BaseModel): + text: Optional[str] = None + progress: Optional[str] = None + icon: Optional[str] = None + color: Optional[str] = None + url: Optional[str] = None + shortcut: Optional[str] = None + graph: Optional[str] = None \ No newline at end of file diff --git a/sijapi/data/tzcache.json b/sijapi/data/tzcache.json index 5e08058..24a42b5 100644 --- a/sijapi/data/tzcache.json +++ b/sijapi/data/tzcache.json @@ -1 +1 @@ -{"last_timezone": "America/Los_Angeles", "last_update": "2024-06-29T09:36:32.143487", "last_location": [44.04645364336354, -123.08688060439617]} \ No newline at end of file +{"last_timezone": "America/Los_Angeles", "last_update": "2024-07-22T12:00:14.193328", "last_location": [42.80982885281664, -123.0494316777397]} \ No newline at end of file diff --git a/sijapi/helpers/CaPLSS_downloader_and_importer.py b/sijapi/helpers/CaPLSS_downloader_and_importer.py new file mode 100644 index 0000000..caebdf0 --- /dev/null +++ b/sijapi/helpers/CaPLSS_downloader_and_importer.py @@ -0,0 +1,117 @@ +import requests +import json +import time +import os +import subprocess + +def get_feature_count(url): + params = { + 'where': '1=1', + 'returnCountOnly': 'true', + 'f': 'json' + } + response = requests.get(url, params=params) + response.raise_for_status() + data = response.json() + return data.get('count', 0) + +def fetch_features(url, offset, num): + params = { + 'where': '1=1', + 'outFields': '*', + 'geometryPrecision': 6, + 'outSR': 4326, + 'f': 'json', + 'resultOffset': offset, + 'resultRecordCount': num + } + response = requests.get(url, params=params) + response.raise_for_status() + return response.json() + +def download_layer(layer_num, layer_name): + url = f"https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/{layer_num}/query" + + total_count = get_feature_count(url) + print(f"Total {layer_name} features: {total_count}") + + batch_size = 1000 + offset = 0 + all_features = [] + + while offset < total_count: + print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...") + data = fetch_features(url, offset, batch_size) + + new_features = data.get('features', []) + if not new_features: + break + + all_features.extend(new_features) + offset += len(new_features) + + print(f"Progress: {len(all_features)}/{total_count} features") + + time.sleep(1) # Be nice to the server + + print(f"Total {layer_name} features fetched: {len(all_features)}") + + # Convert to GeoJSON + geojson_features = [ + { + "type": "Feature", + "properties": feature['attributes'], + "geometry": feature['geometry'] + } for feature in all_features + ] + + full_geojson = { + "type": "FeatureCollection", + "features": geojson_features + } + + # Save to file + file_path = f'/Users/sij/workshop/sijapi/sijapi/data/PLSS_{layer_name}.geojson' + with open(file_path, 'w') as f: + json.dump(full_geojson, f) + + print(f"GeoJSON file saved as '{file_path}'") + + return file_path + +def import_to_postgis(file_path, table_name): + db_name = 'sij' + db_user = 'sij' + db_password = 'Synchr0!' + + ogr2ogr_command = [ + 'ogr2ogr', + '-f', 'PostgreSQL', + f'PG:dbname={db_name} user={db_user} password={db_password}', + file_path, + '-nln', table_name, + '-overwrite' + ] + + subprocess.run(ogr2ogr_command, check=True) + print(f"Data successfully imported into PostGIS table: {table_name}") + +def main(): + try: + # Download and import Townships (Layer 1) + township_file = download_layer(1, "Townships") + import_to_postgis(township_file, "public.plss_townships") + + # Download and import Sections (Layer 2) + section_file = download_layer(2, "Sections") + import_to_postgis(section_file, "public.plss_sections") + + except requests.exceptions.RequestException as e: + print(f"Error fetching data: {e}") + except subprocess.CalledProcessError as e: + print(f"Error importing data into PostGIS: {e}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + +if __name__ == "__main__": + main() diff --git a/sijapi/helpers/CalFire_THP_scraper.py b/sijapi/helpers/CalFire_THP_scraper.py new file mode 100644 index 0000000..92909a6 --- /dev/null +++ b/sijapi/helpers/CalFire_THP_scraper.py @@ -0,0 +1,73 @@ +import requests +import PyPDF2 +import io +import re + +def scrape_data_from_pdf(url): + response = requests.get(url) + pdf_file = io.BytesIO(response.content) + + pdf_reader = PyPDF2.PdfReader(pdf_file) + + all_text = "" + for page in pdf_reader.pages: + all_text += page.extract_text() + "\n" + + return all_text + +def parse_data(raw_data): + lines = raw_data.split('\n') + data = [] + current_entry = None + + for line in lines: + line = line.strip() + if re.match(r'\d+-\d+-\d+-\w+', line): + if current_entry: + data.append(current_entry) + current_entry = {'Harvest Document': line, 'Raw Data': []} + elif current_entry: + current_entry['Raw Data'].append(line) + + if current_entry: + data.append(current_entry) + + return data + +def filter_data(data): + return [entry for entry in data if any(owner.lower() in ' '.join(entry['Raw Data']).lower() for owner in ["Sierra Pacific", "SPI", "Land & Timber"])] + +def extract_location(raw_data): + location = [] + for line in raw_data: + if 'MDBM:' in line or 'HBM:' in line: + location.append(line) + return ' '.join(location) + +def extract_plss_coordinates(text): + pattern = r'(\w+): T(\d+)([NSEW]) R(\d+)([NSEW]) S(\d+)' + return re.findall(pattern, text) + +# Main execution +url = "https://caltreesplans.resources.ca.gov/Caltrees/Report/ShowReport.aspx?module=TH_Document&reportID=492&reportType=LINK_REPORT_LIST" +raw_data = scrape_data_from_pdf(url) + +parsed_data = parse_data(raw_data) +print(f"Total timber plans parsed: {len(parsed_data)}") + +filtered_data = filter_data(parsed_data) +print(f"Found {len(filtered_data)} matching entries.") + +for plan in filtered_data: + print("\nHarvest Document:", plan['Harvest Document']) + + location = extract_location(plan['Raw Data']) + print("Location:", location) + + plss_coordinates = extract_plss_coordinates(location) + print("PLSS Coordinates:") + for coord in plss_coordinates: + meridian, township, township_dir, range_, range_dir, section = coord + print(f" {meridian}: T{township}{township_dir} R{range_}{range_dir} S{section}") + + print("-" * 50) diff --git a/sijapi/helpers/article.py b/sijapi/helpers/article.py new file mode 100755 index 0000000..c76a6e1 --- /dev/null +++ b/sijapi/helpers/article.py @@ -0,0 +1,23 @@ +#!/Users/sij/miniforge3/envs/sijapi/bin/python +import sys +import asyncio +from fastapi import BackgroundTasks +from sijapi.routers.news import process_and_save_article + +async def main(): + if len(sys.argv) != 2: + print("Usage: python script.py ") + sys.exit(1) + + url = sys.argv[1] + bg_tasks = BackgroundTasks() + + try: + result = await process_and_save_article(bg_tasks, url) + print(result) + except Exception as e: + print(f"Error processing article: {str(e)}") + sys.exit(1) + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/sijapi/helpers/cli.py b/sijapi/helpers/cli.py new file mode 100644 index 0000000..209d546 --- /dev/null +++ b/sijapi/helpers/cli.py @@ -0,0 +1,57 @@ +# cli.py +import click +import asyncio +from datetime import datetime as dt_datetime, timedelta + +# Import your async functions and dependencies +from sijapi import build_daily_note_range_endpoint, gis # broken! + +def async_command(f): + @click.command() + @click.pass_context + def wrapper(ctx, *args, **kwargs): + async def run(): + return await f(*args, **kwargs) + return asyncio.run(run()) + return wrapper + +@click.group() +def cli(): + """CLI for your application.""" + pass + +@cli.command() +@click.argument('dt_start') +@click.argument('dt_end') +@async_command +async def bulk_note_range(dt_start: str, dt_end: str): + """ + Build daily notes for a date range. + + DT_START and DT_END should be in YYYY-MM-DD format. + """ + try: + start_date = dt_datetime.strptime(dt_start, "%Y-%m-%d") + end_date = dt_datetime.strptime(dt_end, "%Y-%m-%d") + except ValueError: + click.echo("Error: Dates must be in YYYY-MM-DD format.") + return + + if start_date > end_date: + click.echo("Error: Start date must be before or equal to end date.") + return + + results = [] + current_date = start_date + while current_date <= end_date: + formatted_date = await gis.dt(current_date) + result = await build_daily_note(formatted_date) + results.append(result) + current_date += timedelta(days=1) + + click.echo("Generated notes for the following dates:") + for url in results: + click.echo(url) + +if __name__ == '__main__': + cli() \ No newline at end of file diff --git a/sijapi/helpers/log_prior_emails.py b/sijapi/helpers/log_prior_emails.py index 113e480..10148cf 100644 --- a/sijapi/helpers/log_prior_emails.py +++ b/sijapi/helpers/log_prior_emails.py @@ -43,7 +43,7 @@ async def process_all_emails(account: EmailAccount, summarized_log: Path, autore L.INFO(f"Processed {processed_count} non-unread emails for account {account.name}") except Exception as e: - L.ERR(f"An error occurred while processing emails for account {account.name}: {e}") + L.logger.error(f"An error occurred while processing emails for account {account.name}: {e}") async def main(): email_accounts = email.load_email_accounts(EMAIL_CONFIG) diff --git a/sijapi/routers/asr.py b/sijapi/routers/asr.py index ea4d23c..1981825 100644 --- a/sijapi/routers/asr.py +++ b/sijapi/routers/asr.py @@ -18,6 +18,11 @@ from sijapi import L, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, asr = APIRouter() logger = L.get_module_logger("asr") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) class TranscribeParams(BaseModel): model: str = Field(default="small") @@ -81,7 +86,7 @@ async def transcribe_endpoint( return JSONResponse(content={"status": "timeout", "message": "Transcription is taking longer than expected. Please check back later."}, status_code=202) async def transcribe_audio(file_path, params: TranscribeParams): - logger.debug(f"Transcribing audio file from {file_path}...") + debug(f"Transcribing audio file from {file_path}...") file_path = await convert_to_wav(file_path) model = params.model if params.model in WHISPER_CPP_MODELS else 'small' model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin' @@ -119,11 +124,11 @@ async def transcribe_audio(file_path, params: TranscribeParams): command.extend(['--dtw', params.dtw]) command.extend(['-f', file_path]) - logger.debug(f"Command: {command}") + debug(f"Command: {command}") # Create a unique ID for this transcription job job_id = str(uuid.uuid4()) - logger.debug(f"Created job ID: {job_id}") + debug(f"Created job ID: {job_id}") # Store the job status transcription_results[job_id] = {"status": "processing", "result": None} @@ -135,20 +140,20 @@ async def transcribe_audio(file_path, params: TranscribeParams): poll_interval = 1 # 1 second start_time = asyncio.get_event_loop().time() - logger.debug(f"Starting to poll for job {job_id}") + debug(f"Starting to poll for job {job_id}") try: while asyncio.get_event_loop().time() - start_time < max_wait_time: job_status = transcription_results.get(job_id, {}) - logger.debug(f"Current status for job {job_id}: {job_status['status']}") + debug(f"Current status for job {job_id}: {job_status['status']}") if job_status["status"] == "completed": - logger.info(f"Transcription completed for job {job_id}") + info(f"Transcription completed for job {job_id}") return job_status["result"] elif job_status["status"] == "failed": - logger.error(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}") + err(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}") raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}") await asyncio.sleep(poll_interval) - logger.error(f"Transcription timed out for job {job_id}") + err(f"Transcription timed out for job {job_id}") raise TimeoutError("Transcription timed out") finally: # Ensure the task is cancelled if we exit the loop @@ -159,20 +164,20 @@ async def transcribe_audio(file_path, params: TranscribeParams): async def process_transcription(command, file_path, job_id): try: - logger.debug(f"Starting transcription process for job {job_id}") + debug(f"Starting transcription process for job {job_id}") result = await run_transcription(command, file_path) transcription_results[job_id] = {"status": "completed", "result": result} - logger.debug(f"Transcription completed for job {job_id}") + debug(f"Transcription completed for job {job_id}") except Exception as e: - logger.error(f"Transcription failed for job {job_id}: {str(e)}") + err(f"Transcription failed for job {job_id}: {str(e)}") transcription_results[job_id] = {"status": "failed", "error": str(e)} finally: # Clean up the temporary file os.remove(file_path) - logger.debug(f"Cleaned up temporary file for job {job_id}") + debug(f"Cleaned up temporary file for job {job_id}") async def run_transcription(command, file_path): - logger.debug(f"Running transcription command: {' '.join(command)}") + debug(f"Running transcription command: {' '.join(command)}") proc = await asyncio.create_subprocess_exec( *command, stdout=asyncio.subprocess.PIPE, @@ -181,9 +186,9 @@ async def run_transcription(command, file_path): stdout, stderr = await proc.communicate() if proc.returncode != 0: error_message = f"Error running command: {stderr.decode()}" - logger.error(error_message) + err(error_message) raise Exception(error_message) - logger.debug("Transcription command completed successfully") + debug("Transcription command completed successfully") return stdout.decode().strip() async def convert_to_wav(file_path: str): diff --git a/sijapi/routers/cal.py b/sijapi/routers/cal.py index f1a3399..a4e83b5 100644 --- a/sijapi/routers/cal.py +++ b/sijapi/routers/cal.py @@ -14,42 +14,46 @@ from dateutil.parser import isoparse as parse_iso import threading from typing import Dict, List, Any from datetime import datetime, timedelta -from Foundation import NSDate, NSRunLoop -import EventKit as EK + from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH -from sijapi.routers import loc +from sijapi.routers import gis cal = APIRouter() oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token") timeout = httpx.Timeout(12) logger = L.get_module_logger("cal") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) if MS365_TOGGLE is True: - logger.critical(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.") + crit(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.") @cal.get("/o365/login") async def login(): - logger.debug(f"Received request to /o365/login") - logger.debug(f"SCOPE: {MS365_SCOPE}") + debug(f"Received request to /o365/login") + debug(f"SCOPE: {MS365_SCOPE}") if not MS365_SCOPE: - logger.error("No scopes defined for authorization.") + err("No scopes defined for authorization.") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="No scopes defined for authorization." ) authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}" - logger.info(f"Redirecting to authorization URL: {authorization_url}") + info(f"Redirecting to authorization URL: {authorization_url}") return RedirectResponse(authorization_url) @cal.get("/o365/oauth_redirect") async def oauth_redirect(code: str = None, error: str = None): - logger.debug(f"Received request to /o365/oauth_redirect") + debug(f"Received request to /o365/oauth_redirect") if error: - logger.error(f"OAuth2 Error: {error}") + err(f"OAuth2 Error: {error}") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error" ) - logger.info(f"Requesting token with authorization code: {code}") + info(f"Requesting token with authorization code: {code}") token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token" data = { "client_id": MS365_CLIENT_ID, @@ -60,15 +64,15 @@ if MS365_TOGGLE is True: } async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post(token_url, data=data) - logger.debug(f"Token endpoint response status code: {response.status_code}") - logger.info(f"Token endpoint response text: {response.text}") + debug(f"Token endpoint response status code: {response.status_code}") + info(f"Token endpoint response text: {response.text}") result = response.json() if 'access_token' in result: await save_token(result) - logger.info("Access token obtained successfully") + info("Access token obtained successfully") return {"message": "Access token stored successfully"} else: - logger.critical(f"Failed to obtain access token. Response: {result}") + crit(f"Failed to obtain access token. Response: {result}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to obtain access token" @@ -76,7 +80,7 @@ if MS365_TOGGLE is True: @cal.get("/o365/me") async def read_items(): - logger.debug(f"Received request to /o365/me") + debug(f"Received request to /o365/me") token = await load_token() if not token: raise HTTPException( @@ -89,10 +93,10 @@ if MS365_TOGGLE is True: response = await client.get(graph_url, headers=headers) if response.status_code == 200: user = response.json() - logger.info(f"User retrieved: {user}") + info(f"User retrieved: {user}") return user else: - logger.error("Invalid or expired token") + err("Invalid or expired token") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or expired token", @@ -100,14 +104,14 @@ if MS365_TOGGLE is True: ) async def save_token(token): - logger.debug(f"Saving token: {token}") + debug(f"Saving token: {token}") try: token["expires_at"] = int(time.time()) + token["expires_in"] with open(MS365_TOKEN_PATH, "w") as file: json.dump(token, file) - logger.debug(f"Saved token to {MS365_TOKEN_PATH}") + debug(f"Saved token to {MS365_TOKEN_PATH}") except Exception as e: - logger.error(f"Failed to save token: {e}") + err(f"Failed to save token: {e}") async def load_token(): if os.path.exists(MS365_TOKEN_PATH): @@ -115,21 +119,21 @@ if MS365_TOGGLE is True: with open(MS365_TOKEN_PATH, "r") as file: token = json.load(file) except FileNotFoundError: - logger.error("Token file not found.") + err("Token file not found.") return None except json.JSONDecodeError: - logger.error("Failed to decode token JSON") + err("Failed to decode token JSON") return None if token: token["expires_at"] = int(time.time()) + token["expires_in"] - logger.debug(f"Loaded token: {token}") # Add this line to log the loaded token + debug(f"Loaded token: {token}") # Add this line to log the loaded token return token else: - logger.debug("No token found.") + debug("No token found.") return None else: - logger.error(f"No file found at {MS365_TOKEN_PATH}") + err(f"No file found at {MS365_TOKEN_PATH}") return None @@ -159,64 +163,137 @@ if MS365_TOGGLE is True: response = await client.post(token_url, data=data) result = response.json() if "access_token" in result: - logger.info("Access token refreshed successfully") + info("Access token refreshed successfully") return result else: - logger.error("Failed to refresh access token") + err("Failed to refresh access token") return None async def refresh_token(): token = await load_token() if not token: - logger.error("No token found in storage") + err("No token found in storage") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="No token found", ) if 'refresh_token' not in token: - logger.error("Refresh token not found in the loaded token") + err("Refresh token not found in the loaded token") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Refresh token not found", ) refresh_token = token['refresh_token'] - logger.debug("Found refresh token, attempting to refresh access token") + debug("Found refresh token, attempting to refresh access token") new_token = await get_new_token_with_refresh_token(refresh_token) if new_token: await save_token(new_token) - logger.info("Token refreshed and saved successfully") + info("Token refreshed and saved successfully") else: - logger.error("Failed to refresh token") + err("Failed to refresh token") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to refresh token", ) +if ICAL_TOGGLE is True: + from Foundation import NSDate, NSRunLoop + import EventKit as EK -def get_calendar_ids() -> Dict[str, str]: - event_store = EK.EKEventStore.alloc().init() - all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent + # Helper to convert datetime to NSDate + def datetime_to_nsdate(dt: datetime) -> NSDate: + return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp()) - calendar_identifiers = { - calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars - } - logger.debug(f"{calendar_identifiers}") - return calendar_identifiers + def get_calendar_ids() -> Dict[str, str]: + event_store = EK.EKEventStore.alloc().init() + all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent -# Helper to convert datetime to NSDate -def datetime_to_nsdate(dt: datetime) -> NSDate: - return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp()) + calendar_identifiers = { + calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars + } + debug(f"{calendar_identifiers}") + return calendar_identifiers + def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]: + event_store = EK.EKEventStore.alloc().init() + + # Request access to EventKit + def request_access() -> bool: + access_granted = [] + + def completion_handler(granted, error): + if error is not None: + err(f"Error: {error}") + access_granted.append(granted) + with access_granted_condition: + access_granted_condition.notify() + + access_granted_condition = threading.Condition() + with access_granted_condition: + event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent + access_granted_condition.wait(timeout=10) + if access_granted: + return access_granted[0] + else: + err("Request access timed out or failed") + return False + + if not request_access(): + err("Access to calendar data was not granted") + return [] + + ns_start_date = datetime_to_nsdate(start_date) + ns_end_date = datetime_to_nsdate(end_date) + + # Retrieve all calendars + all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent + if calendar_ids: + selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids] + else: + selected_calendars = all_calendars + + # Filtering events by selected calendars + predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars) + events = event_store.eventsMatchingPredicate_(predicate) + + event_list = [] + for event in events: + # Check if event.attendees() returns None + if event.attendees(): + attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()] + else: + attendees = [] + + # Format the start and end dates properly + start_date_str = event.startDate().descriptionWithLocale_(None) + end_date_str = event.endDate().descriptionWithLocale_(None) + + event_data = { + "subject": event.title(), + "id": event.eventIdentifier(), + "start": start_date_str, + "end": end_date_str, + "bodyPreview": event.notes() if event.notes() else '', + "attendees": attendees, + "location": event.location() if event.location() else '', + "onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this + "showAs": 'busy', # Default to 'busy' + "isAllDay": event.isAllDay() + } + + event_list.append(event_data) + + return event_list @cal.get("/events") async def get_events_endpoint(start_date: str, end_date: str): - start_dt = await loc.dt(start_date) - end_dt = await loc.dt(end_date) + start_dt = await gis.dt(start_date) + end_dt = await gis.dt(end_date) datetime.strptime(start_date, "%Y-%m-%d") or datetime.now() end_dt = datetime.strptime(end_date, "%Y-%m-%d") or datetime.now() response = await get_events(start_dt, end_dt) @@ -237,80 +314,6 @@ async def get_events(start_dt: datetime, end_dt: datetime) -> List: return parsed_events -def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]: - event_store = EK.EKEventStore.alloc().init() - - # Request access to EventKit - def request_access() -> bool: - access_granted = [] - - def completion_handler(granted, error): - if error is not None: - logger.error(f"Error: {error}") - access_granted.append(granted) - # Notify the main thread that the completion handler has executed - with access_granted_condition: - access_granted_condition.notify() - - access_granted_condition = threading.Condition() - with access_granted_condition: - event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent - # Wait for the completion handler to be called - access_granted_condition.wait(timeout=10) - # Verify that the handler was called and access_granted is not empty - if access_granted: - return access_granted[0] - else: - logger.error("Request access timed out or failed") - return False - - if not request_access(): - logger.error("Access to calendar data was not granted") - return [] - - ns_start_date = datetime_to_nsdate(start_date) - ns_end_date = datetime_to_nsdate(end_date) - - # Retrieve all calendars - all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent - if calendar_ids: - selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids] - else: - selected_calendars = all_calendars - - # Filtering events by selected calendars - predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars) - events = event_store.eventsMatchingPredicate_(predicate) - - event_list = [] - for event in events: - # Check if event.attendees() returns None - if event.attendees(): - attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()] - else: - attendees = [] - - # Format the start and end dates properly - start_date_str = event.startDate().descriptionWithLocale_(None) - end_date_str = event.endDate().descriptionWithLocale_(None) - - event_data = { - "subject": event.title(), - "id": event.eventIdentifier(), - "start": start_date_str, - "end": end_date_str, - "bodyPreview": event.notes() if event.notes() else '', - "attendees": attendees, - "location": event.location() if event.location() else '', - "onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this - "showAs": 'busy', # Default to 'busy' - "isAllDay": event.isAllDay() - } - - event_list.append(event_data) - - return event_list - async def get_ms365_events(start_date: datetime, end_date: datetime): token = await load_token() if token: @@ -331,7 +334,7 @@ async def get_ms365_events(start_date: datetime, end_date: datetime): response = await client.get(graph_url, headers=headers) if response.status_code != 200: - logger.error("Failed to retrieve events from Microsoft 365") + err("Failed to retrieve events from Microsoft 365") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to retrieve events", @@ -342,48 +345,48 @@ async def get_ms365_events(start_date: datetime, end_date: datetime): async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]): - range_start = await loc.dt(range_start) - range_end = await loc.dt(range_end) + range_start = await gis.dt(range_start) + range_end = await gis.dt(range_end) event_list = [] for event in events: - logger.info(f"Event: {event}") + info(f"Event: {event}") start_str = event.get('start') end_str = event.get('end') if isinstance(start_str, dict): start_str = start_str.get('dateTime') else: - logger.info(f"Start date string not a dict") + info(f"Start date string not a dict") if isinstance(end_str, dict): end_str = end_str.get('dateTime') else: - logger.info(f"End date string not a dict") + info(f"End date string not a dict") try: - start_date = await loc.dt(start_str) if start_str else None + start_date = await gis.dt(start_str) if start_str else None except (ValueError, TypeError) as e: - logger.error(f"Invalid start date format: {start_str}, error: {e}") + err(f"Invalid start date format: {start_str}, error: {e}") continue try: - end_date = await loc.dt(end_str) if end_str else None + end_date = await gis.dt(end_str) if end_str else None except (ValueError, TypeError) as e: - logger.error(f"Invalid end date format: {end_str}, error: {e}") + err(f"Invalid end date format: {end_str}, error: {e}") continue - logger.debug(f"Comparing {start_date} with range {range_start} to {range_end}") + debug(f"Comparing {start_date} with range {range_start} to {range_end}") if start_date: # Ensure start_date is timezone-aware - start_date = await loc.dt(start_date) + start_date = await gis.dt(start_date) # If end_date is not provided, assume it's the same as start_date if not end_date: end_date = start_date else: - end_date = await loc.dt(end_date) + end_date = await gis.dt(end_date) # Check if the event overlaps with the given range if (start_date < range_end) and (end_date > range_start): @@ -405,11 +408,11 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve "busy": event.get('showAs', '') in ['busy', 'tentative'], "all_day": event.get('isAllDay', False) } - logger.info(f"Event_data: {event_data}") + info(f"Event_data: {event_data}") event_list.append(event_data) else: - logger.debug(f"Event outside of specified range: {start_date} to {end_date}") + debug(f"Event outside of specified range: {start_date} to {end_date}") else: - logger.error(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}") + err(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}") return event_list \ No newline at end of file diff --git a/sijapi/routers/cf.py b/sijapi/routers/cf.py index 6cd40bf..b23fa25 100644 --- a/sijapi/routers/cf.py +++ b/sijapi/routers/cf.py @@ -13,6 +13,11 @@ import os cf = APIRouter() logger = L.get_module_logger("cal") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) class DNSRecordRequest(BaseModel): full_domain: str @@ -70,7 +75,7 @@ async def retry_request(url, headers, max_retries=5, backoff_factor=1): response.raise_for_status() return response except (httpx.HTTPError, httpx.ConnectTimeout) as e: - logger.error(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...") + err(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...") await sleep(backoff_factor * (2 ** retry)) raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request") diff --git a/sijapi/routers/dist.py b/sijapi/routers/dist.py index 0f1b7f4..05f09cb 100644 --- a/sijapi/routers/dist.py +++ b/sijapi/routers/dist.py @@ -9,6 +9,11 @@ from sijapi import L, REBOOT_SCRIPT_PATH, HOST_CONFIG, API_CONFIG dist = APIRouter() logger = L.get_module_logger("dist") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) @dist.get("/update-restart-others") async def update_and_restart_others(): @@ -32,10 +37,10 @@ async def update_and_restart_self(safe: bool = True): stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await process.communicate() - logger.info(f"Update and restart initiated for self. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}") + info(f"Update and restart initiated for self. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}") return {"message": "Update and restart process initiated for this server."} except Exception as e: - logger.error(f"Failed to initiate update and restart for self: {str(e)}") + err(f"Failed to initiate update and restart for self: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to initiate update and restart: {str(e)}") @dist.get("/update-and-restart-all") @@ -56,5 +61,5 @@ async def ensure_redundancy(): redundancy = True break except aiohttp.ClientError: - logger.warning(f"Failed to check health of server {server.id}") + warn(f"Failed to check health of server {server.id}") return redundancy diff --git a/sijapi/routers/email.py b/sijapi/routers/email.py index 19e7fd5..1e7f9d3 100644 --- a/sijapi/routers/email.py +++ b/sijapi/routers/email.py @@ -21,25 +21,24 @@ import yaml from typing import List, Dict, Optional, Set from datetime import datetime as dt_datetime from sijapi import L, PODCAST_DIR, DEFAULT_VOICE, EMAIL_CONFIG, EMAIL_LOGS -from sijapi.routers import img, loc, tts, llm +from sijapi.routers import gis, img, tts, llm from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder email = APIRouter() logger = L.get_module_logger("email") -print(f"Email logger level: {logger.level}") # Debug print - -logger.debug("This is a debug message from email module") -logger.info("This is an info message from email module") - +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) def load_email_accounts(yaml_path: str) -> List[EmailAccount]: with open(yaml_path, 'r') as file: config = yaml.safe_load(file) return [EmailAccount(**account) for account in config['accounts']] - def get_imap_connection(account: EmailAccount): return Imbox(account.imap.host, username=account.imap.username, @@ -59,34 +58,34 @@ def get_smtp_connection(autoresponder: AutoResponder): if smtp_config.encryption == 'SSL': try: - logger.debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}") + debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}") return SMTP_SSL(smtp_config.host, smtp_config.port, context=context) except ssl.SSLError as e: - logger.error(f"SSL connection failed: {str(e)}") + err(f"SSL connection failed: {str(e)}") # If SSL fails, try TLS try: - logger.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}") + debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}") smtp = SMTP(smtp_config.host, smtp_config.port) smtp.starttls(context=context) return smtp except Exception as e: - logger.error(f"STARTTLS connection failed: {str(e)}") + err(f"STARTTLS connection failed: {str(e)}") raise elif smtp_config.encryption == 'STARTTLS': try: - logger.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}") + debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}") smtp = SMTP(smtp_config.host, smtp_config.port) smtp.starttls(context=context) return smtp except Exception as e: - logger.error(f"STARTTLS connection failed: {str(e)}") + err(f"STARTTLS connection failed: {str(e)}") raise else: try: - logger.debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}") + debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}") return SMTP(smtp_config.host, smtp_config.port) except Exception as e: - logger.error(f"Unencrypted connection failed: {str(e)}") + err(f"Unencrypted connection failed: {str(e)}") raise async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool: @@ -103,20 +102,20 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment)) message.attach(img) - logger.debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...") + debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...") server = get_smtp_connection(profile) - logger.debug(f"SMTP connection established: {type(server)}") + debug(f"SMTP connection established: {type(server)}") server.login(profile.smtp.username, profile.smtp.password) server.send_message(message) - logger.info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!") + info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!") return True except Exception as e: - logger.error(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}") - logger.error(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}") - logger.error(traceback.format_exc()) + err(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}") + err(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}") + err(traceback.format_exc()) return False finally: @@ -124,7 +123,7 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes try: server.quit() except Exception as e: - logger.error(f"Error closing SMTP connection: {str(e)}") + err(f"Error closing SMTP connection: {str(e)}") @@ -161,15 +160,15 @@ async def process_account_archival(account: EmailAccount): while True: try: processed_uids = await load_processed_uids(summarized_log) - logger.debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.") + debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.") with get_imap_connection(account) as inbox: unread_messages = inbox.messages(unread=True) - logger.debug(f"There are {len(unread_messages)} unread messages.") + debug(f"There are {len(unread_messages)} unread messages.") for uid, message in unread_messages: uid_str = uid.decode() if isinstance(uid, bytes) else str(uid) if uid_str not in processed_uids: recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to] - localized_datetime = await loc.dt(message.date) + localized_datetime = await gis.dt(message.date) this_email = IncomingEmail( sender=message.sent_from[0]['email'], datetime_received=localized_datetime, @@ -184,13 +183,13 @@ async def process_account_archival(account: EmailAccount): save_success = await save_email(md_path, md_content) if save_success: await save_processed_uid(summarized_log, account.name, uid_str) - logger.info(f"Summarized email: {uid_str}") + info(f"Summarized email: {uid_str}") else: - logger.warning(f"Failed to summarize {this_email.subject}") + warn(f"Failed to summarize {this_email.subject}") else: - logger.debug(f"Skipping {uid_str} because it was already processed.") + debug(f"Skipping {uid_str} because it was already processed.") except Exception as e: - logger.error(f"An error occurred during summarization for account {account.name}: {e}") + err(f"An error occurred during summarization for account {account.name}: {e}") await asyncio.sleep(account.refresh) @@ -236,7 +235,7 @@ tags: return markdown_content except Exception as e: - logger.error(f"Exception: {e}") + err(f"Exception: {e}") return False async def save_email(md_path, md_content): @@ -244,14 +243,14 @@ async def save_email(md_path, md_content): with open(md_path, 'w', encoding='utf-8') as md_file: md_file.write(md_content) - logger.debug(f"Saved markdown to {md_path}") + debug(f"Saved markdown to {md_path}") return True except Exception as e: - logger.error(f"Failed to save email: {e}") + err(f"Failed to save email: {e}") return False def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]: - logger.debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"") + debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"") def matches_list(item: str, this_email: IncomingEmail) -> bool: if '@' in item: return item in this_email.sender @@ -262,12 +261,12 @@ def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist) blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist) if whitelist_match and not blacklist_match: - logger.debug(f"We have a match for {whitelist_match} and no blacklist matches.") + debug(f"We have a match for {whitelist_match} and no blacklist matches.") matching_profiles.append(profile) elif whitelist_match and blacklist_match: - logger.debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}") + debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}") else: - logger.debug(f"No whitelist or blacklist matches.") + debug(f"No whitelist or blacklist matches.") return matching_profiles @@ -278,30 +277,30 @@ async def process_account_autoresponding(account: EmailAccount): while True: try: processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG) - logger.debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.") + debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.") with get_imap_connection(account) as inbox: unread_messages = inbox.messages(unread=True) - logger.debug(f"There are {len(unread_messages)} unread messages.") + debug(f"There are {len(unread_messages)} unread messages.") for uid, message in unread_messages: uid_str = uid.decode() if isinstance(uid, bytes) else str(uid) if uid_str not in processed_uids: await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG) else: - logger.debug(f"Skipping {uid_str} because it was already processed.") + debug(f"Skipping {uid_str} because it was already processed.") except Exception as e: - logger.error(f"An error occurred during auto-responding for account {account.name}: {e}") + err(f"An error occurred during auto-responding for account {account.name}: {e}") await asyncio.sleep(account.refresh) async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path): this_email = await create_incoming_email(message) - logger.debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...") + debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...") matching_profiles = get_matching_autoresponders(this_email, account) - logger.debug(f"Matching profiles: {matching_profiles}") + debug(f"Matching profiles: {matching_profiles}") for profile in matching_profiles: response_body = await generate_response(this_email, profile, account) @@ -311,18 +310,18 @@ async def autorespond_single_email(message, uid_str: str, account: EmailAccount, jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None success = await send_response(this_email.sender, subject, response_body, profile, jpg_path) if success: - logger.warning(f"Auto-responded to email: {this_email.subject}") + warn(f"Auto-responded to email: {this_email.subject}") await save_processed_uid(log_file, account.name, uid_str) else: - logger.warning(f"Failed to send auto-response to {this_email.subject}") + warn(f"Failed to send auto-response to {this_email.subject}") else: - logger.warning(f"Unable to generate auto-response for {this_email.subject}") + warn(f"Unable to generate auto-response for {this_email.subject}") async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]: - logger.info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}") + info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}") - now = await loc.dt(dt_datetime.now()) - then = await loc.dt(this_email.datetime_received) + now = await gis.dt(dt_datetime.now()) + then = await gis.dt(this_email.datetime_received) age = now - then usr_prompt = f''' Generate a personalized auto-response to the following email: @@ -337,7 +336,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec try: response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400) - logger.debug(f"query_ollama response: {response}") + debug(f"query_ollama response: {response}") if isinstance(response, dict) and "message" in response and "content" in response["message"]: response = response["message"]["content"] @@ -345,14 +344,14 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec return response + "\n\n" except Exception as e: - logger.error(f"Error generating auto-response: {str(e)}") + err(f"Error generating auto-response: {str(e)}") return None async def create_incoming_email(message) -> IncomingEmail: recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to] - localized_datetime = await loc.dt(message.date) + localized_datetime = await gis.dt(message.date) return IncomingEmail( sender=message.sent_from[0]['email'], datetime_received=localized_datetime, @@ -372,7 +371,6 @@ async def save_processed_uid(filename: Path, account_name: str, uid: str): async with aiofiles.open(filename, 'a') as f: await f.write(f"{account_name}:{uid}\n") - async def process_all_accounts(): email_accounts = load_email_accounts(EMAIL_CONFIG) summarization_tasks = [asyncio.create_task(process_account_archival(account)) for account in email_accounts] diff --git a/sijapi/routers/gis.py b/sijapi/routers/gis.py new file mode 100644 index 0000000..8d8b937 --- /dev/null +++ b/sijapi/routers/gis.py @@ -0,0 +1,568 @@ +''' +Uses Postgres/PostGIS for location tracking (data obtained via the companion mobile Pythonista scripts), and for geocoding purposes. +''' +from fastapi import APIRouter, HTTPException, Query +from fastapi.responses import HTMLResponse, JSONResponse +import random +from pathlib import Path +import traceback +from datetime import datetime, timezone +from typing import Union, List +import folium +from folium.plugins import HeatMap, MarkerCluster, Search +from folium.plugins import Fullscreen, MiniMap, MousePosition, Geocoder, Draw, MeasureControl +from zoneinfo import ZoneInfo +from dateutil.parser import parse as dateutil_parse +from typing import Optional, List, Union +from sijapi import L, DB, TZ, GEO +from sijapi.classes import Location +from sijapi.utilities import haversine, assemble_journal_path + +gis = APIRouter() +logger = L.get_module_logger("gis") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) + +async def dt( + date_time: Union[str, int, datetime], + tz: Union[str, ZoneInfo, None] = None +) -> datetime: + try: + # Convert integer (epoch time) to UTC datetime + if isinstance(date_time, int): + date_time = datetime.fromtimestamp(date_time, tz=timezone.utc) + debug(f"Converted epoch time {date_time} to UTC datetime object.") + + # Convert string to datetime if necessary + elif isinstance(date_time, str): + date_time = dateutil_parse(date_time) + debug(f"Converted string '{date_time}' to datetime object.") + + if not isinstance(date_time, datetime): + raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}") + + # Ensure the datetime is timezone-aware (UTC if not specified) + if date_time.tzinfo is None: + date_time = date_time.replace(tzinfo=timezone.utc) + debug("Added UTC timezone to naive datetime.") + + # Handle provided timezone + if tz is not None: + if isinstance(tz, str): + if tz == "local": + last_loc = await get_timezone_without_timezone(date_time) + tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude) + debug(f"Using local timezone: {tz}") + else: + try: + tz = ZoneInfo(tz) + except Exception as e: + err(f"Invalid timezone string '{tz}'. Error: {e}") + raise ValueError(f"Invalid timezone string: {tz}") + elif isinstance(tz, ZoneInfo): + pass # tz is already a ZoneInfo object + else: + raise ValueError(f"What we needed: tz == 'local', a string, or a ZoneInfo object. What we got: tz, a {type(tz)}, == {tz})") + + # Convert to the provided or determined timezone + date_time = date_time.astimezone(tz) + debug(f"Converted datetime to timezone: {tz}") + + return date_time + except ValueError as e: + err(f"Error in dt: {e}") + raise + except Exception as e: + err(f"Unexpected error in dt: {e}") + raise ValueError(f"Failed to process datetime: {e}") + + +async def get_timezone_without_timezone(date_time): + # This is a bit convoluted because we're trying to solve the paradox of needing to + # know the location in order to determine the timezone, but needing the timezone to be + # certain we've chosen the correct location for a provided timezone-naive datetime + # (relevant, e.g., if this datetime coincided with inter-timezone travel). + # Our imperfect solution is to use UTC for an initial location query to determine + # roughly where we were at the time, get that timezone, then check the location again + # applying that timezone to the provided datetime. If the location changed between the + # datetime in UTC and the localized datetime, we'll use the new location's timezone; + # otherwise we'll use the timezone we sourced from the UTC timezone query. But at the + # end of the day it's entirely possible to spend the end of the day twice in two different + # timezones (or none!), so this is a best-effort solution. + + # Step 1: Use UTC as an interim timezone to query location + interim_dt = date_time.replace(tzinfo=ZoneInfo("UTC")) + interim_loc = await fetch_last_location_before(interim_dt) + + # Step 2: Get a preliminary timezone based on the interim location + interim_tz = await GEO.tz_current((interim_loc.latitude, interim_loc.longitude)) + + # Step 3: Apply this preliminary timezone and query location again + query_dt = date_time.replace(tzinfo=ZoneInfo(interim_tz)) + query_loc = await fetch_last_location_before(query_dt) + + # Step 4: Get the final timezone, reusing interim_tz if location hasn't changed + return interim_tz if query_loc == interim_loc else await GEO.tz_current(query_loc.latitude, query_loc.longitude) + + +async def get_last_location() -> Optional[Location]: + query_datetime = datetime.now(TZ) + debug(f"Query_datetime: {query_datetime}") + + this_location = await fetch_last_location_before(query_datetime) + + if this_location: + debug(f"location: {this_location}") + return this_location + + return None + + +async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int, datetime, None] = None) -> List[Location]: + start_datetime = await dt(start) + if end is None: + end_datetime = await dt(start_datetime.replace(hour=23, minute=59, second=59)) + else: + end_datetime = await dt(end) if not isinstance(end, datetime) else end + + if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time(): + end_datetime = await dt(end_datetime.replace(hour=23, minute=59, second=59)) + + debug(f"Fetching locations between {start_datetime} and {end_datetime}") + + async with DB.get_connection() as conn: + locations = [] + # Check for records within the specified datetime range + range_locations = await conn.fetch(''' + SELECT id, datetime, + ST_X(ST_AsText(location)::geometry) AS longitude, + ST_Y(ST_AsText(location)::geometry) AS latitude, + ST_Z(ST_AsText(location)::geometry) AS elevation, + city, state, zip, street, + action, device_type, device_model, device_name, device_os + FROM locations + WHERE datetime >= $1 AND datetime <= $2 + ORDER BY datetime DESC + ''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None)) + + debug(f"Range locations query returned: {range_locations}") + locations.extend(range_locations) + + if not locations and (end is None or start_datetime.date() == end_datetime.date()): + location_data = await conn.fetchrow(''' + SELECT id, datetime, + ST_X(ST_AsText(location)::geometry) AS longitude, + ST_Y(ST_AsText(location)::geometry) AS latitude, + ST_Z(ST_AsText(location)::geometry) AS elevation, + city, state, zip, street, + action, device_type, device_model, device_name, device_os + FROM locations + WHERE datetime < $1 + ORDER BY datetime DESC + LIMIT 1 + ''', start_datetime.replace(tzinfo=None)) + + debug(f"Fallback query returned: {location_data}") + if location_data: + locations.append(location_data) + + debug(f"Locations found: {locations}") + + # Sort location_data based on the datetime field in descending order + sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True) + + # Create Location objects directly from the location data + location_objects = [ + Location( + latitude=location['latitude'], + longitude=location['longitude'], + datetime=location['datetime'], + elevation=location.get('elevation'), + city=location.get('city'), + state=location.get('state'), + zip=location.get('zip'), + street=location.get('street'), + context={ + 'action': location.get('action'), + 'device_type': location.get('device_type'), + 'device_model': location.get('device_model'), + 'device_name': location.get('device_name'), + 'device_os': location.get('device_os') + } + ) for location in sorted_locations if location['latitude'] is not None and location['longitude'] is not None + ] + + return location_objects if location_objects else [] + +# Function to fetch the last location before the specified datetime +async def fetch_last_location_before(datetime: datetime) -> Optional[Location]: + datetime = await dt(datetime) + + debug(f"Fetching last location before {datetime}") + + async with DB.get_connection() as conn: + + location_data = await conn.fetchrow(''' + SELECT id, datetime, + ST_X(ST_AsText(location)::geometry) AS longitude, + ST_Y(ST_AsText(location)::geometry) AS latitude, + ST_Z(ST_AsText(location)::geometry) AS elevation, + city, state, zip, street, country, + action + FROM locations + WHERE datetime < $1 + ORDER BY datetime DESC + LIMIT 1 + ''', datetime.replace(tzinfo=None)) + + await conn.close() + + if location_data: + debug(f"Last location found: {location_data}") + return Location(**location_data) + else: + debug("No location found before the specified datetime") + return None + +@gis.get("/map", response_class=HTMLResponse) +async def generate_map_endpoint( + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + max_points: int = Query(32767, description="Maximum number of points to display") +): + try: + if start_date and end_date: + start_date = await dt(start_date) + end_date = await dt(end_date) + else: + start_date, end_date = await get_date_range() + except ValueError: + raise HTTPException(status_code=400, detail="Invalid date format") + + info(f"Generating map for {start_date} to {end_date}") + html_content = await generate_map(start_date, end_date, max_points) + return HTMLResponse(content=html_content) + +async def get_date_range(): + async with DB.get_connection() as conn: + query = "SELECT MIN(datetime) as min_date, MAX(datetime) as max_date FROM locations" + row = await conn.fetchrow(query) + if row and row['min_date'] and row['max_date']: + return row['min_date'], row['max_date'] + else: + return datetime(2022, 1, 1), datetime.now() + + + + +async def generate_and_save_heatmap( + start_date: Union[str, int, datetime], + end_date: Optional[Union[str, int, datetime]] = None, + output_path: Optional[Path] = None +) -> Path: + """ +Generate a heatmap for the given date range and save it as a PNG file using Folium. + +:param start_date: The start date for the map (or the only date if end_date is not provided) +:param end_date: The end date for the map (optional) +:param output_path: The path to save the PNG file (optional) +:return: The path where the PNG file was saved + """ + try: + start_date = await dt(start_date) + if end_date: + end_date = await dt(end_date) + else: + end_date = start_date.replace(hour=23, minute=59, second=59) + + # Fetch locations + locations = await fetch_locations(start_date, end_date) + if not locations: + raise ValueError("No locations found for the given date range") + + # Create map + m = folium.Map() + + # Prepare heatmap data + heat_data = [[loc.latitude, loc.longitude] for loc in locations] + + # Add heatmap layer + HeatMap(heat_data).add_to(m) + + # Fit the map to the bounds of all locations + bounds = [ + [min(loc.latitude for loc in locations), min(loc.longitude for loc in locations)], + [max(loc.latitude for loc in locations), max(loc.longitude for loc in locations)] + ] + m.fit_bounds(bounds) + + # Generate output path if not provided + if output_path is None: + output_path, relative_path = assemble_journal_path(end_date, filename="map", extension=".png", no_timestamp=True) + + # Save the map as PNG + m.save(str(output_path)) + + info(f"Heatmap saved as PNG: {output_path}") + return output_path + + except Exception as e: + err(f"Error generating and saving heatmap: {str(e)}") + raise + + + +async def generate_map(start_date: datetime, end_date: datetime, max_points: int): + locations = await fetch_locations(start_date, end_date) + if not locations: + raise HTTPException(status_code=404, detail="No locations found for the given date range") + + info(f"Found {len(locations)} locations for the given date range") + + if len(locations) > max_points: + locations = random.sample(locations, max_points) + + map_center = [sum(loc.latitude for loc in locations) / len(locations), + sum(loc.longitude for loc in locations) / len(locations)] + m = folium.Map(location=map_center, zoom_start=5) + + folium.TileLayer('openstreetmap', name='OpenStreetMap').add_to(m) + folium.TileLayer( + tiles='https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}', + attr='USGS The National Map', + name='USGS Topo' + ).add_to(m) + folium.TileLayer( + tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}', + attr='Esri', + name='Esri World Topo' + ).add_to(m) + + folium.TileLayer('cartodbdark_matter', name='Dark Mode').add_to(m) + + + # In the generate_map function: + draw = Draw( + draw_options={ + 'polygon': True, + 'rectangle': True, + 'circle': True, + 'marker': True, + 'circlemarker': False, + }, + edit_options={'edit': False} + ) + draw.add_to(m) + + MeasureControl( + position='topright', + primary_length_unit='kilometers', + secondary_length_unit='miles', + primary_area_unit='sqmeters', + secondary_area_unit='acres' + ).add_to(m) + + m.get_root().html.add_child(folium.Element(""" + + """)) + + # Add marker cluster + marker_cluster = MarkerCluster(name="Markers").add_to(m) + + # Prepare data for heatmap + heat_data = [[loc.latitude, loc.longitude] for loc in locations] + + # Add heatmap + HeatMap(heat_data, name="Heatmap").add_to(m) + + # Add markers to cluster + for location in locations: + popup_content = f""" + {location.city}, {location.state}
+ Elevation: {location.elevation}m
+ Date: {location.datetime}
+ Action: {location.context.get('action', 'N/A')}
+ Device: {location.context.get('device_name', 'N/A')} ({location.context.get('device_model', 'N/A')}) + """ + folium.Marker( + location=[location.latitude, location.longitude], + popup=popup_content, + tooltip=f"{location.city}, {location.state}" + ).add_to(marker_cluster) + + # Add controls + Fullscreen().add_to(m) + MiniMap().add_to(m) + MousePosition().add_to(m) + Geocoder().add_to(m) + Draw().add_to(m) + + # Add search functionality + Search( + layer=marker_cluster, + geom_type='Point', + placeholder='Search for a location', + collapsed=False, + search_label='city' + ).add_to(m) + + # Add layer control + folium.LayerControl().add_to(m) + + return m.get_root().render() + +async def post_location(location: Location): + # if not location.datetime: + # info(f"location appears to be missing datetime: {location}") + # else: + # debug(f"post_location called with {location.datetime}") + async with DB.get_connection() as conn: + try: + context = location.context or {} + action = context.get('action', 'manual') + device_type = context.get('device_type', 'Unknown') + device_model = context.get('device_model', 'Unknown') + device_name = context.get('device_name', 'Unknown') + device_os = context.get('device_os', 'Unknown') + + # Parse and localize the datetime + localized_datetime = await dt(location.datetime) + + await conn.execute(''' + INSERT INTO locations ( + datetime, location, city, state, zip, street, action, device_type, device_model, device_name, device_os, + class_, type, name, display_name, amenity, house_number, road, quarter, neighbourhood, + suburb, county, country_code, country + ) + VALUES ($1, ST_SetSRID(ST_MakePoint($2, $3, $4), 4326), $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, + $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26) + ''', localized_datetime, location.longitude, location.latitude, location.elevation, location.city, location.state, + location.zip, location.street, action, device_type, device_model, device_name, device_os, + location.class_, location.type, location.name, location.display_name, + location.amenity, location.house_number, location.road, location.quarter, location.neighbourhood, + location.suburb, location.county, location.country_code, location.country) + + await conn.close() + info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}") + return { + 'datetime': localized_datetime, + 'latitude': location.latitude, + 'longitude': location.longitude, + 'elevation': location.elevation, + 'city': location.city, + 'state': location.state, + 'zip': location.zip, + 'street': location.street, + 'action': action, + 'device_type': device_type, + 'device_model': device_model, + 'device_name': device_name, + 'device_os': device_os, + 'class_': location.class_, + 'type': location.type, + 'name': location.name, + 'display_name': location.display_name, + 'amenity': location.amenity, + 'house_number': location.house_number, + 'road': location.road, + 'quarter': location.quarter, + 'neighbourhood': location.neighbourhood, + 'suburb': location.suburb, + 'county': location.county, + 'country_code': location.country_code, + 'country': location.country + } + except Exception as e: + err(f"Error posting location {e}") + err(traceback.format_exc()) + return None + + +@gis.post("/locate") +async def post_locate_endpoint(locations: Union[Location, List[Location]]): + if isinstance(locations, Location): + locations = [locations] + + # Prepare locations + for lcn in locations: + if not lcn.datetime: + tz = await GEO.tz_at(lcn.latitude, lcn.longitude) + lcn.datetime = datetime.now(ZoneInfo(tz)).isoformat() + + if not lcn.context: + lcn.context = { + "action": "missing", + "device_type": "API", + "device_model": "Unknown", + "device_name": "Unknown", + "device_os": "Unknown" + } + debug(f"Location received for processing: {lcn}") + + geocoded_locations = await GEO.code(locations) + + responses = [] + if isinstance(geocoded_locations, List): + for location in geocoded_locations: + debug(f"Final location to be submitted to database: {location}") + location_entry = await post_location(location) + if location_entry: + responses.append({"location_data": location_entry}) + else: + warn(f"Posting location to database appears to have failed.") + else: + debug(f"Final location to be submitted to database: {geocoded_locations}") + location_entry = await post_location(geocoded_locations) + if location_entry: + responses.append({"location_data": location_entry}) + else: + warn(f"Posting location to database appears to have failed.") + + return {"message": "Locations and weather updated", "results": responses} + + +@gis.get("/locate", response_model=Location) +async def get_last_location_endpoint() -> JSONResponse: + this_location = await get_last_location() + + if this_location: + location_dict = this_location.model_dump() + location_dict["datetime"] = this_location.datetime.isoformat() + return JSONResponse(content=location_dict) + else: + raise HTTPException(status_code=404, detail="No location found before the specified datetime") + +@gis.get("/locate/{datetime_str}", response_model=List[Location]) +async def get_locate(datetime_str: str, all: bool = False): + try: + date_time = await dt(datetime_str) + except ValueError as e: + err(f"Invalid datetime string provided: {datetime_str}") + return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."] + + locations = await fetch_locations(date_time) + if not locations: + raise HTTPException(status_code=404, detail="No nearby data found for this date and time") + + return locations if all else [locations[0]] \ No newline at end of file diff --git a/sijapi/routers/health.py b/sijapi/routers/health.py index ebca96f..41baa5e 100644 --- a/sijapi/routers/health.py +++ b/sijapi/routers/health.py @@ -12,6 +12,11 @@ from sijapi import L, API, TS_ID, SUBNET_BROADCAST health = APIRouter(tags=["public", "trusted", "private"]) logger = L.get_module_logger("health") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) @health.get("/health") def get_health(): @@ -49,7 +54,7 @@ async def get_wan_ip(): wan_info = response.json() return wan_info.get('ip', 'Unavailable') except Exception as e: - logger.error(f"Error fetching WAN IP: {e}") + err(f"Error fetching WAN IP: {e}") return "Unavailable" @health.get("/ts_ip") diff --git a/sijapi/routers/ig.py b/sijapi/routers/ig.py index 818e05c..47825b5 100644 --- a/sijapi/routers/ig.py +++ b/sijapi/routers/ig.py @@ -42,6 +42,11 @@ import base64 ig = APIRouter() logger = L.get_module_logger("ig") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) class IG_Request(BaseModel): file: Optional[UploadFile] = None # upload a particular file to Instagram diff --git a/sijapi/routers/img.py b/sijapi/routers/img.py index 06ff3ae..faadd1a 100644 --- a/sijapi/routers/img.py +++ b/sijapi/routers/img.py @@ -34,6 +34,12 @@ from sijapi import API, L, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG img = APIRouter() logger = L.get_module_logger("img") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) + CLIENT_ID = str(uuid.uuid4()) @img.post("/img") @@ -79,12 +85,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s scene_workflow = random.choice(scene_data['workflows']) if size: - logger.debug(f"Specified size: {size}") + debug(f"Specified size: {size}") size = size if size else scene_workflow.get('size', '1024x1024') width, height = map(int, size.split('x')) - logger.debug(f"Parsed width: {width}; parsed height: {height}") + debug(f"Parsed width: {width}; parsed height: {height}") workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow'] workflow_data = json.loads(workflow_path.read_text()) @@ -98,22 +104,22 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s } saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept) - print(f"Saved file key: {saved_file_key}") + info(f"Saved file key: {saved_file_key}") prompt_id = await queue_prompt(workflow_data) - print(f"Prompt ID: {prompt_id}") + info(f"Prompt ID: {prompt_id}") max_size = max(width, height) if downscale_to_fit else None destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg" if earlyout: asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)) - logger.debug(f"Returning {destination_path}") + debug(f"Returning {destination_path}") return destination_path else: await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path) - logger.debug(f"Returning {destination_path}") + debug(f"Returning {destination_path}") return destination_path @@ -124,10 +130,10 @@ async def generate_and_save_image(prompt_id, saved_file_key, max_size, destinati jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path) if Path(jpg_file_path) != Path(destination_path): - logger.error(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}") + err(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}") except Exception as e: - print(f"Error in generate_and_save_image: {e}") + err(f"Error in generate_and_save_image: {e}") return None @@ -149,7 +155,7 @@ async def poll_status(prompt_id): status_data = await response.json() job_data = status_data.get(prompt_id, {}) if job_data.get("status", {}).get("completed", False): - print(f"{prompt_id} completed in {elapsed_time} seconds.") + info(f"{prompt_id} completed in {elapsed_time} seconds.") return job_data await asyncio.sleep(1) @@ -200,7 +206,7 @@ async def save_as_jpg(image_data, prompt_id, max_size = None, quality = 100, des return str(destination_path_jpg) except Exception as e: - print(f"Error processing image: {e}") + err(f"Error processing image: {e}") return None @@ -216,11 +222,11 @@ def set_presets(workflow_data, preset_values): if 'inputs' in workflow_data.get(preset_node, {}): workflow_data[preset_node]['inputs'][preset_key] = preset_value else: - logger.debug("Node not found in workflow_data") + debug("Node not found in workflow_data") else: - logger.debug("Required data missing in preset_values") + debug("Required data missing in preset_values") else: - logger.debug("No preset_values found") + debug("No preset_values found") def get_return_path(destination_path): @@ -235,7 +241,7 @@ def get_scene(scene): IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file) for scene_data in IMG_CONFIG['scenes']: if scene_data['scene'] == scene: - logger.debug(f"Found scene for \"{scene}\".") + debug(f"Found scene for \"{scene}\".") return scene_data return None @@ -254,11 +260,11 @@ def get_matching_scene(prompt): max_count = count scene_data = sc if scene_data: - logger.debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!") + debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!") if scene_data: return scene_data else: - logger.debug(f"No matching scenes found, falling back to default scene.") + debug(f"No matching scenes found, falling back to default scene.") return IMG_CONFIG['scenes'][0] @@ -282,11 +288,11 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0): for attempt in range(retries): try: with socket.create_connection(("127.0.0.1", 8188), timeout=2): - print("ComfyUI is already running.") + info("ComfyUI is already running.") return except (socket.timeout, ConnectionRefusedError): if attempt == 0: # Only try to start ComfyUI on the first failed attempt - print("ComfyUI is not running. Starting it now...") + warn("ComfyUI is not running. Starting it now...") try: tmux_command = ( "tmux split-window -h " @@ -295,13 +301,14 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0): "python main.py; exec $SHELL\"" ) subprocess.Popen(tmux_command, shell=True) - print("ComfyUI started in a new tmux session.") + info("ComfyUI started in a new tmux session.") except Exception as e: raise RuntimeError(f"Error starting ComfyUI: {e}") - print(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...") + warn(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...") await asyncio.sleep(timeout) + crit(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.") raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.") # async def upload_and_get_shareable_link(image_path): @@ -326,10 +333,10 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0): # shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}" # return shareable_link # else: - # logger.error("Could not find the uploaded photo details.") + # err("Could not find the uploaded photo details.") # return None # except Exception as e: - # logger.error(f"Error in upload_and_get_shareable_link: {e}") + # err(f"Error in upload_and_get_shareable_link: {e}") # return None @@ -405,7 +412,7 @@ async def load_workflow(workflow_path: str, workflow:str): return json.load(file) -async def update_prompt_and_get_key(workf0ow: dict, post: dict, positive: str): +async def update_prompt_and_get_key(workflow: dict, post: dict, positive: str): ''' Recurses through the workflow searching for and substituting the dynamic values for API_PrePrompt, API_StylePrompt, API_NegativePrompt, width, height, and seed (random integer). Even more important, it finds and returns the key to the filepath where the file is saved, which we need to decipher status when generation is complete. @@ -436,13 +443,13 @@ Even more important, it finds and returns the key to the filepath where the file workflow[key] = random.randint(1000000000000, 9999999999999) elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]: - logger.debug(f"Got a hit for a dimension: {key} {value}") + debug(f"Got a hit for a dimension: {key} {value}") if value == 1023: workflow[key] = post.get("width", 1024) - logger.debug(f"Set {key} to {workflow[key]}.") + debug(f"Set {key} to {workflow[key]}.") elif value == 1025: workflow[key] = post.get("height", 1024) - logger.debug(f"Set {key} to {workflow[key]}.") + debug(f"Set {key} to {workflow[key]}.") update_recursive(workflow) return found_key[0] diff --git a/sijapi/routers/llm.py b/sijapi/routers/llm.py index db10a72..6329fc8 100644 --- a/sijapi/routers/llm.py +++ b/sijapi/routers/llm.py @@ -33,10 +33,15 @@ from sijapi.routers.asr import transcribe_audio llm = APIRouter() logger = L.get_module_logger("llm") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) # Initialize chromadb client client = chromadb.Client() -OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian") +# OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian") VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"] # Function to read all markdown files in the folder @@ -48,6 +53,7 @@ def read_markdown_files(folder: Path): documents.append(file.read()) return documents, file_paths +reimplement=''' # Read markdown files and generate embeddings documents, file_paths = read_markdown_files(DOC_DIR) for i, doc in enumerate(documents): @@ -57,7 +63,7 @@ for i, doc in enumerate(documents): ids=[file_paths[i]], embeddings=[embedding], documents=[doc] - ) + )''' # Function to retrieve the most relevant document given a prompt @llm.get("/retrieve_document/{prompt}") @@ -89,13 +95,13 @@ async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LL LLM = Ollama() response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens}) - logger.debug(response) + debug(response) if "message" in response: if "content" in response["message"]: content = response["message"]["content"] return content else: - logger.debug("No choices found in response") + debug("No choices found in response") return None async def query_ollama_multishot( @@ -116,12 +122,12 @@ async def query_ollama_multishot( LLM = Ollama() response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens}) - logger.debug(response) + debug(response) if "message" in response and "content" in response["message"]: return response["message"]["content"] else: - logger.debug("No content found in response") + debug("No content found in response") return None @@ -140,21 +146,21 @@ async def chat_completions(request: Request): raise HTTPException(status_code=400, detail="Message data is required in the request body.") requested_model = body.get('model', 'default-model') - logger.debug(f"Requested model: {requested_model}") + debug(f"Requested model: {requested_model}") stream = body.get('stream') token_limit = body.get('max_tokens') or body.get('num_predict') # Check if the most recent message contains an image_url recent_message = messages[-1] if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')): - logger.debug("Processing as a vision request") + debug("Processing as a vision request") model = "llava" - logger.debug(f"Using model: {model}") + debug(f"Using model: {model}") return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json") else: - logger.debug("Processing as a standard request") + debug("Processing as a standard request") model = requested_model - logger.debug(f"Using model: {model}") + debug(f"Using model: {model}") if stream: return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json") else: @@ -279,17 +285,17 @@ async def generate_messages(messages: list, model: str = "llama3"): def is_model_available(model_name): model_data = OllamaList() available_models = [model['name'] for model in model_data['models']] - logger.debug(f"Available models: {available_models}") # Log using the configured LOGGER + debug(f"Available models: {available_models}") # Log using the configured LOGGER matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name] if len(matching_models) == 1: - logger.debug(f"Unique match found: {matching_models[0]}") + debug(f"Unique match found: {matching_models[0]}") return True elif len(matching_models) > 1: - logger.error(f"Ambiguous match found, models: {matching_models}") + err(f"Ambiguous match found, models: {matching_models}") return True else: - logger.error(f"No match found for model: {model_name}") + err(f"No match found for model: {model_name}") return False @@ -412,12 +418,12 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"): return first_choice.message.content else: - logger.debug("No content attribute in the first choice's message") - logger.debug(f"No content found in message string: {response.choices}") - logger.debug("Trying again!") + debug("No content attribute in the first choice's message") + debug(f"No content found in message string: {response.choices}") + debug("Trying again!") query_gpt4(messages, max_tokens) else: - logger.debug(f"No content found in message string: {response}") + debug(f"No content found in message string: {response}") return "" def llava(image_base64, prompt): @@ -427,7 +433,7 @@ def llava(image_base64, prompt): prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}", images = [image_base64] ) - logger.debug(response) + debug(response) return "" if "pass" in response["response"].lower() else response["response"] def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150): @@ -458,7 +464,7 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150) comment_content = first_choice.message.content if "PASS" in comment_content: return "" - logger.debug(f"Generated comment: {comment_content}") + debug(f"Generated comment: {comment_content}") response_2 = VISION_LLM.chat.completions.create( model="gpt-4-vision-preview", @@ -496,15 +502,15 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150) first_choice = response_2.choices[0] if first_choice.message and first_choice.message.content: final_content = first_choice.message.content - logger.debug(f"Generated comment: {final_content}") + debug(f"Generated comment: {final_content}") if "PASS" in final_content: return "" else: return final_content - logger.debug("Vision response did not contain expected data.") - logger.debug(f"Vision response: {response_1}") + debug("Vision response did not contain expected data.") + debug(f"Vision response: {response_1}") asyncio.sleep(15) try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens) @@ -562,7 +568,7 @@ async def summarize_tts_endpoint( ) except Exception as e: - logger.error(f"Error in summarize_tts_endpoint: {str(e)}") + err(f"Error in summarize_tts_endpoint: {str(e)}") return JSONResponse( status_code=400, content={"error": str(e)} @@ -589,11 +595,11 @@ async def summarize_tts( bg_tasks = BackgroundTasks() model = await tts.get_model(voice) final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename) - logger.debug(f"summary_tts completed with final_output_path: {final_output_path}") + debug(f"summary_tts completed with final_output_path: {final_output_path}") return final_output_path -async def get_title(text: str, LLM: Ollama() = None): +async def get_title(text: str, LLM = None): LLM = LLM if LLM else Ollama() title = await process_chunk("Generate a title for this text", text, 1, 1, 12, LLM) title = sanitize_filename(title) @@ -605,10 +611,10 @@ def split_text_into_chunks(text: str) -> List[str]: sentences = re.split(r'(?<=[.!?])\s+', text) words = text.split() total_words = len(words) - logger.debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.") + debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.") max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW) - logger.debug(f"Maximum words per chunk: {max_words_per_chunk}") + debug(f"Maximum words per chunk: {max_words_per_chunk}") chunks = [] current_chunk = [] @@ -628,7 +634,7 @@ def split_text_into_chunks(text: str) -> List[str]: if current_chunk: chunks.append(' '.join(current_chunk)) - logger.debug(f"Split text into {len(chunks)} chunks.") + debug(f"Split text into {len(chunks)} chunks.") return chunks @@ -640,11 +646,11 @@ def calculate_max_tokens(text: str) -> int: async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str: - logger.info(f"Attempting to extract text from file: {file}") + info(f"Attempting to extract text from file: {file}") try: if isinstance(file, UploadFile): - logger.info("File is an UploadFile object") + info("File is an UploadFile object") file_extension = os.path.splitext(file.filename)[1] temp_file_path = tempfile.mktemp(suffix=file_extension) with open(temp_file_path, 'wb') as buffer: @@ -663,7 +669,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_ _, file_ext = os.path.splitext(file_path) file_ext = file_ext.lower() - logger.info(f"File extension: {file_ext}") + info(f"File extension: {file_ext}") if file_ext == '.pdf': text_content = await extract_text_from_pdf(file_path) @@ -690,7 +696,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_ return text_content except Exception as e: - logger.error(f"Error extracting text: {str(e)}") + err(f"Error extracting text: {str(e)}") raise ValueError(f"Error extracting text: {str(e)}") @@ -699,17 +705,17 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_ chunked_text = split_text_into_chunks(text) total_parts = len(chunked_text) - logger.debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}") + debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}") total_words_count = sum(len(chunk.split()) for chunk in chunked_text) - logger.debug(f"Total words count: {total_words_count}") + debug(f"Total words count: {total_words_count}") total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW)) - logger.debug(f"Total tokens count: {total_tokens_count}") + debug(f"Total tokens count: {total_tokens_count}") total_summary_length = length_override if length_override else total_tokens_count // length_quotient - logger.debug(f"Total summary length: {total_summary_length}") + debug(f"Total summary length: {total_summary_length}") corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT) - logger.debug(f"Corrected total summary length: {corrected_total_summary_length}") + debug(f"Corrected total summary length: {corrected_total_summary_length}") summaries = await asyncio.gather(*[ process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM) @@ -720,21 +726,21 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_ summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)] concatenated_summary = ' '.join(summaries) - logger.debug(f"Concatenated summary: {concatenated_summary}") - logger.debug(f"Concatenated summary length: {len(concatenated_summary.split())}") + debug(f"Concatenated summary: {concatenated_summary}") + debug(f"Concatenated summary length: {len(concatenated_summary.split())}") if total_parts > 1: - logger.debug(f"Processing the concatenated_summary to smooth the edges...") + debug(f"Processing the concatenated_summary to smooth the edges...") concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts." final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM) - logger.debug(f"Final summary length: {len(final_summary.split())}") + debug(f"Final summary length: {len(final_summary.split())}") return final_summary else: return concatenated_summary async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str: - # logger.debug(f"Processing chunk: {text}") + # debug(f"Processing chunk: {text}") LLM = LLM if LLM else Ollama() words_count = len(text.split()) @@ -744,14 +750,14 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE) max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) - logger.debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}") + debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}") if part and total_parts > 1: prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}" else: prompt = f"{instruction}:\n\n{text}" - logger.debug(f"Starting LLM.generate for part {part} of {total_parts}") + info(f"Starting LLM.generate for part {part} of {total_parts}") response = await LLM.generate( model=SUMMARY_MODEL, prompt=prompt, @@ -760,8 +766,8 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int ) text_response = response['response'] - logger.debug(f"Completed LLM.generate for part {part} of {total_parts}") - logger.debug(f"Result: {text_response}") + info(f"Completed LLM.generate for part {part} of {total_parts}") + debug(f"Result: {text_response}") return text_response async def title_and_summary(extracted_text: str): diff --git a/sijapi/routers/loc.py b/sijapi/routers/loc.py index 776f0ef..05cdcc4 100644 --- a/sijapi/routers/loc.py +++ b/sijapi/routers/loc.py @@ -261,11 +261,10 @@ async def generate_map(start_date: datetime, end_date: datetime): return html_content async def post_location(location: Location): - if not location.datetime: - logger.debug(f"location appears to be missing datetime: {location}") - else: - logger.debug(f"post_location called with {location.datetime}") - + # if not location.datetime: + # logger.debug(f"location appears to be missing datetime: {location}") + # else: + # logger.debug(f"post_location called with {location.datetime}") async with DB.get_connection() as conn: try: context = location.context or {} diff --git a/sijapi/routers/news.py b/sijapi/routers/news.py index 605be68..6ce3b3f 100644 --- a/sijapi/routers/news.py +++ b/sijapi/routers/news.py @@ -5,159 +5,178 @@ import asyncio import shutil import requests from bs4 import BeautifulSoup -from zoneinfo import ZoneInfo from urllib.parse import urlparse from datetime import datetime as dt_datetime, timedelta from typing import Optional - import aiohttp import aiofiles import newspaper import trafilatura +from newspaper import Article from readability import Document from markdownify import markdownify as md from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry - -from fastapi import APIRouter, BackgroundTasks, File, UploadFile, Form, HTTPException, Response, Query, Path as FastAPIPath -from fastapi.responses import JSONResponse -from pydantic import BaseModel - +from fastapi import APIRouter, BackgroundTasks, UploadFile, Form, HTTPException, Query, Path as FastAPIPath from pathlib import Path -from sijapi import API, L, Dir, News, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO +from sijapi import L, News, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, DEFAULT_11L_VOICE, DEFAULT_VOICE from sijapi.utilities import sanitize_filename, assemble_journal_path, assemble_archive_path -from sijapi.routers import llm, tts, asr, loc, note +from sijapi.routers import gis, llm, tts, note -from newspaper import Article news = APIRouter() logger = L.get_module_logger("news") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) -async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "summary", voice: str = DEFAULT_11L_VOICE): +async def process_and_save_article( + bg_tasks: BackgroundTasks, + url: str, + title: Optional[str] = None, + tts_mode: str = "summary", + voice: str = DEFAULT_VOICE, + site_name: Optional[str] = None +) -> str: try: - url = article.url - source = trafilatura.fetch_url(url) + # Fetch and parse article + article = await fetch_and_parse_article(url) - if source is None: - # Fallback to newspaper3k if trafilatura fails - article.download() - article.parse() - traf = None - else: - traf = trafilatura.extract_metadata(filecontent=source, default_url=url) - article.download() - article.parse() + # Generate title and file paths + title = sanitize_filename(title or article.title or f"Untitled - {dt_datetime.now().strftime('%Y-%m-%d')}") + markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=title, extension=".md") - # Update article properties, preferring trafilatura data when available - article.title = traf.title if traf and traf.title else article.title or url - article.authors = traf.author if traf and traf.author else article.authors or [] - article.publish_date = traf.date if traf and traf.date else article.publish_date - try: - article.publish_date = await loc.dt(article.publish_date, "UTC") - except: - logger.debug(f"Failed to localize {article.publish_date}") - article.publish_date = await loc.dt(dt_datetime.now(), "UTC") - article.meta_description = traf.description if traf and traf.description else article.meta_description - article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) if source else article.text - article.top_image = traf.image if traf and traf.image else article.top_image - article.source_url = traf.sitename if traf and traf.sitename else urlparse(url).netloc.replace('www.', '').title() - article.meta_keywords = traf.categories or traf.tags if traf else article.meta_keywords or [] - article.meta_keywords = article.meta_keywords if isinstance(article.meta_keywords, list) else [article.meta_keywords] + # Generate summary + summary = await generate_summary(article.text) - if not is_article_within_date_range(article, earliest_date): - return False + # Handle TTS + audio_link = await handle_tts(bg_tasks, article, title, tts_mode, voice, summary) + # Generate markdown content + markdown_content = generate_markdown_content(article, title, summary, audio_link, site_name) - timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M') - readable_title = sanitize_filename(article.title or timestamp) - markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md") + # Save markdown file + await save_markdown_file(markdown_filename, markdown_content) - summary = await llm.summarize_text(article.text, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.") - summary = summary.replace('\n', ' ') # Remove line breaks - - if tts_mode == "full" or tts_mode == "content": - tts_text = article.text - elif tts_mode == "summary" or tts_mode == "excerpt": - tts_text = summary - else: - tts_text = None - - banner_markdown = '' - try: - banner_url = article.top_image - if banner_url: - banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}.jpg")) - if banner_image: - banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" - except Exception as e: - logger.error(f"No image found in article") - - - authors = ', '.join(['[[{}]]'.format(author.strip()) for author in article.authors if author.strip()]) - if not authors: - authors = '[[Unknown Author]]' - - frontmatter = f"""--- -title: {readable_title} -authors: {authors} -published: {article.publish_date} -added: {timestamp} -banner: "{banner_markdown}" -tags: -""" - frontmatter += '\n'.join(f" - {tag}" for tag in article.meta_keywords) - frontmatter += '\n---\n' - - body = f"# {readable_title}\n\n" - if tts_text: - audio_filename = f"{article.publish_date.strftime('%Y-%m-%d')} {readable_title}" - try: - audio_path = await tts.generate_speech( - bg_tasks=bg_tasks, - text=tts_text, - voice=voice, - model="xtts2", - podcast=True, - title=audio_filename, - output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR - ) - if isinstance(audio_path, Path): - audio_ext = audio_path.suffix - obsidian_link = f"![[{audio_path.name}]]" - body += f"{obsidian_link}\n\n" - else: - logger.warning(f"Unexpected audio_path type: {type(audio_path)}. Value: {audio_path}") - except Exception as e: - logger.error(f"Failed to generate TTS for {audio_filename}. Error: {str(e)}") - logger.error(f"TTS error details - voice: {voice}, model: eleven_turbo_v2, podcast: True") - logger.error(f"Output directory: {Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR}") - - body += f"by {authors} in {article.source_url}\n\n" - body += f"> [!summary]+\n" - body += f"> {summary}\n\n" - body += article.text - - markdown_content = frontmatter + body - - with open(markdown_filename, 'w') as md_file: - md_file.write(markdown_content) - - logger.info(f"Successfully saved to {markdown_filename}") - note.add_to_daily_note(relative_path) - print(f"Saved article: {relative_path}") - return True + # Add to daily note + await note.add_to_daily_note(relative_path) + return f"Successfully saved: {relative_path}" except Exception as e: - logger.error(f"Error processing article from {article.url}: {str(e)}") + err(f"Failed to process article {url}: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + + +async def fetch_and_parse_article(url: str) -> Article: + source = trafilatura.fetch_url(url) + traf = trafilatura.extract_metadata(filecontent=source, default_url=url) + + article = Article(url) + article.set_html(source) + article.parse() + + # Update article properties with trafilatura data + article.title = article.title or traf.title or url + article.authors = article.authors or (traf.author if isinstance(traf.author, list) else [traf.author]) + article.publish_date = await gis.dt(article.publish_date or traf.date or dt_datetime.now(), "UTC") + article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text + article.top_image = article.top_image or traf.image + article.source_url = traf.sitename or urlparse(url).netloc.replace('www.', '').title() + article.meta_keywords = list(set(article.meta_keywords or traf.categories or traf.tags or [])) + + return article + +def is_article_within_date_range(article: Article, days_back: int) -> bool: + earliest_date = dt_datetime.now().date() - timedelta(days=days_back) + return article.publish_date.date() >= earliest_date + +async def generate_summary(text: str) -> str: + summary = await llm.summarize_text(text, "Summarize the provided text. Respond with the summary and nothing else.") + return summary.replace('\n', ' ') + +async def handle_tts(bg_tasks: BackgroundTasks, article: Article, title: str, tts_mode: str, voice: str, summary: str) -> Optional[str]: + if tts_mode in ["full", "content"]: + tts_text = article.text + elif tts_mode in ["summary", "excerpt"]: + tts_text = summary + else: + return None + + audio_filename = f"{article.publish_date.strftime('%Y-%m-%d')} {title}" + try: + audio_path = await tts.generate_speech( + bg_tasks=bg_tasks, + text=tts_text, + voice=voice, + model="xtts", + podcast=True, + title=audio_filename, + output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR + ) + return f"![[{Path(audio_path).name}]]" + except HTTPException as e: + err(f"Failed to generate TTS: {str(e)}") + return None + + +def generate_markdown_content(article: Article, title: str, summary: str, audio_link: Optional[str], site_name: Optional[str] = None) -> str: + frontmatter = f"""--- +title: {title} +authors: {', '.join(f'[[{author}]]' for author in article.authors)} +published: {article.publish_date} +added: {dt_datetime.now().strftime('%b %d, %Y at %H:%M')} +banner: "{get_banner_markdown(article.top_image)}" +tags: +{chr(10).join(f' - {tag}' for tag in article.meta_keywords)} +""" + if site_name: + frontmatter += f"site: {site_name}\n" + frontmatter += "---\n\n" + + body = f"# {title}\n\n" + if audio_link: + body += f"{audio_link}\n\n" + body += f"by {', '.join(article.authors)} in [{article.source_url}]({article.url})\n\n" + body += f"> [!summary]+\n> {summary}\n\n" + body += article.text + + return frontmatter + body + + +def get_banner_markdown(image_url: str) -> str: + if not image_url: + return '' + try: + banner_image = download_file(image_url, Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR) + return f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" if banner_image else '' + except Exception as e: + err(f"Failed to download banner image: {str(e)}") + return '' + +async def save_markdown_file(filename: str, content: str): + async with aiofiles.open(filename, 'w', encoding='utf-8') as f: + await f.write(content) + + +async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "off", voice: str = DEFAULT_11L_VOICE): + try: + url = article.url + parsed_article = await fetch_and_parse_article(url) + + if not is_article_within_date_range(parsed_article, earliest_date): + return False + + return await process_and_save_article(bg_tasks, url, None, tts_mode, voice, site_name=site_name) + + except Exception as e: + err(f"Error processing article from {article.url}: {str(e)}") return False -# You'll need to update your is_article_within_date_range function: -def is_article_within_date_range(article, earliest_date): - return article.publish_date is not None and article.publish_date.date() >= earliest_date - async def process_news_site(site, bg_tasks: BackgroundTasks): - print(f"Downloading articles from {site.name}...") + info(f"Downloading articles from {site.name}...") earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back) @@ -179,11 +198,11 @@ async def process_news_site(site, bg_tasks: BackgroundTasks): results = await asyncio.gather(*tasks) articles_downloaded = sum(results) - print(f"Downloaded {articles_downloaded} articles from {site.name}") + info(f"Downloaded {articles_downloaded} articles from {site.name}") except Exception as e: - print(f"Error processing {site.name}: {str(e)}") + err(f"Error processing {site.name}: {str(e)}") + -# Update your news_refresh_endpoint function: @news.get("/news/refresh") async def news_refresh_endpoint(bg_tasks: BackgroundTasks): tasks = [process_news_site(site, bg_tasks) for site in News.sites] @@ -192,32 +211,41 @@ async def news_refresh_endpoint(bg_tasks: BackgroundTasks): async def generate_path(article, site_name): - publish_date = await loc.dt(article.publish_date, 'UTC') if article.publish_date else await loc.dt(dt_datetime.now(), 'UTC') + publish_date = await gis.dt(article.publish_date, 'UTC') if article.publish_date else await gis.dt(dt_datetime.now(), 'UTC') title_slug = "".join(c if c.isalnum() else "_" for c in article.title) filename = f"{site_name} - {title_slug[:50]}.md" absolute_path, relative_path = assemble_journal_path(publish_date, 'Articles', filename, extension='.md', no_timestamp=True) return absolute_path, relative_path + async def save_article_to_file(content, output_path): output_path.parent.mkdir(parents=True, exist_ok=True) async with aiofiles.open(output_path, 'w', encoding='utf-8') as file: await file.write(content) - -### CLIPPER ### @news.post("/clip") async def clip_post( bg_tasks: BackgroundTasks, - url: Optional[str] = Form(None), - source: Optional[str] = Form(None), + url: str = Form(...), title: Optional[str] = Form(None), tts: str = Form('summary'), voice: str = Form(DEFAULT_VOICE), - encoding: str = Form('utf-8') ): - markdown_filename = await process_article(bg_tasks, url, title, encoding, source, tts, voice) - return {"message": "Clip saved successfully", "markdown_filename": markdown_filename} + result = await process_and_save_article(bg_tasks, url, title, tts, voice) + return {"message": "Clip saved successfully", "result": result} + +@news.get("/clip") +async def clip_get( + bg_tasks: BackgroundTasks, + url: str, + tts: str = Query('summary'), + voice: str = Query(DEFAULT_VOICE) +): + result = await process_and_save_article(bg_tasks, url, None, tts, voice) + return {"message": "Clip saved successfully", "result": result} + + @news.post("/archive") async def archive_post( @@ -229,203 +257,6 @@ async def archive_post( markdown_filename = await process_archive(url, title, encoding, source) return {"message": "Clip saved successfully", "markdown_filename": markdown_filename} -@news.get("/clip") -async def clip_get( - bg_tasks: BackgroundTasks, - url: str, - tts: str = Query('summary'), - voice: str = Query(DEFAULT_VOICE) -): - parsed_content = await parse_article(url) - markdown_filename = await process_article2(bg_tasks, parsed_content, tts, voice) - return {"message": "Clip saved successfully", "markdown_filename": markdown_filename} - - - - -async def process_article2( - bg_tasks: BackgroundTasks, - parsed_content: Article, - tts_mode: str = "summary", - voice: str = DEFAULT_11L_VOICE -): - timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M') - - readable_title = sanitize_filename(parsed_content.title or timestamp) - markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md") - - try: - summary = await llm.summarize_text(parsed_content.clean_doc, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.") - summary = summary.replace('\n', ' ') # Remove line breaks - - if tts_mode == "full" or tts_mode == "content": - tts_text = parsed_content.clean_doc - elif tts_mode == "summary" or tts_mode == "excerpt": - tts_text = summary - else: - tts_text = None - - banner_markdown = '' - try: - banner_url = parsed_content.top_image - if banner_url != '': - banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR)) - if banner_image: - banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" - - except Exception as e: - logger.error(f"No image found in article") - - authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.authors) - published_date = parsed_content.publish_date - frontmatter = f"""--- -title: {readable_title} -authors: {authors} -published: {published_date} -added: {timestamp} -banner: "{banner_markdown}" -tags: - -""" - frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.tags) - frontmatter += '\n---\n' - - body = f"# {readable_title}\n\n" - if tts_text: - audio_filename = f"{published_date} {readable_title}" - try: - audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename, - output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR) - audio_ext = Path(audio_path).suffix - obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" - body += f"{obsidian_link}\n\n" - except Exception as e: - logger.error(f"Failed to generate TTS for np3k. {e}") - - try: - body += f"by {authors} in {parsed_content.canonical_link}" # update with method for getting the newspaper name - body += f"> [!summary]+\n" - body += f"> {summary}\n\n" - body += parsed_content["content"] - markdown_content = frontmatter + body - - except Exception as e: - logger.error(f"Failed to combine elements of article markdown.") - - try: - with open(markdown_filename, 'w') as md_file: - md_file.write(markdown_content) - - logger.info(f"Successfully saved to {markdown_filename}") - note.add_to_daily_note(relative_path) - return markdown_filename - - except Exception as e: - logger.error(f"Failed to write markdown file") - raise HTTPException(status_code=500, detail=str(e)) - - except Exception as e: - logger.error(f"Failed to clip: {str(e)}") - raise HTTPException(status_code=500, detail=str(e)) - - -async def process_article( - bg_tasks: BackgroundTasks, - url: str, - title: Optional[str] = None, - encoding: str = 'utf-8', - source: Optional[str] = None, - tts_mode: str = "summary", - voice: str = DEFAULT_11L_VOICE -): - - timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M') - - parsed_content = await parse_article(url, source) - if parsed_content is None: - return {"error": "Failed to retrieve content"} - - readable_title = sanitize_filename(title or parsed_content.get("title") or timestamp) - markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md") - - try: - summary = await llm.summarize_text(parsed_content["content"], "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.") - summary = summary.replace('\n', ' ') # Remove line breaks - - if tts_mode == "full" or tts_mode == "content": - tts_text = parsed_content["content"] - elif tts_mode == "summary" or tts_mode == "excerpt": - tts_text = summary - else: - tts_text = None - - banner_markdown = '' - try: - banner_url = parsed_content.get('image', '') - if banner_url != '': - banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR)) - if banner_image: - banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" - - except Exception as e: - logger.error(f"No image found in article") - - authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown'])) - - frontmatter = f"""--- -title: {readable_title} -authors: {', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))} -published: {parsed_content.get('date_published', 'Unknown')} -added: {timestamp} -excerpt: {parsed_content.get('excerpt', '')} -banner: "{banner_markdown}" -tags: - -""" - frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.get('tags', [])) - frontmatter += '\n---\n' - - body = f"# {readable_title}\n\n" - - if tts_text: - datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S") - audio_filename = f"{datetime_str} {readable_title}" - try: - audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename, - output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR) - audio_ext = Path(audio_path).suffix - obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" - body += f"{obsidian_link}\n\n" - except Exception as e: - logger.error(f"Failed to generate TTS for np3k. {e}") - - try: - body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n" - body += f"> [!summary]+\n" - body += f"> {summary}\n\n" - body += parsed_content["content"] - markdown_content = frontmatter + body - - except Exception as e: - logger.error(f"Failed to combine elements of article markdown.") - - try: - with open(markdown_filename, 'w', encoding=encoding) as md_file: - md_file.write(markdown_content) - - logger.info(f"Successfully saved to {markdown_filename}") - note.add_to_daily_note(relative_path) - return markdown_filename - - except Exception as e: - logger.error(f"Failed to write markdown file") - raise HTTPException(status_code=500, detail=str(e)) - - except Exception as e: - logger.error(f"Failed to clip {url}: {str(e)}") - raise HTTPException(status_code=500, detail=str(e)) - - async def parse_article(url: str, source: Optional[str] = None) -> Article: source = source if source else trafilatura.fetch_url(url) @@ -436,7 +267,7 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article: article.set_html(source) article.parse() - logger.info(f"Parsed {article.title}") + info(f"Parsed {article.title}") # Update or set properties based on trafilatura and additional processing article.title = article.title or traf.title or url @@ -444,10 +275,10 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article: article.publish_date = article.publish_date or traf.date try: - article.publish_date = await loc.dt(article.publish_date, "UTC") + article.publish_date = await gis.dt(article.publish_date, "UTC") except: - logger.debug(f"Failed to localize {article.publish_date}") - article.publish_date = await loc.dt(dt_datetime.now(), "UTC") + debug(f"Failed to localize {article.publish_date}") + article.publish_date = await gis.dt(dt_datetime.now(), "UTC") article.meta_description = article.meta_description or traf.description article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text @@ -467,7 +298,6 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article: return article - async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]: if source: html_content = source @@ -476,7 +306,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str] async with session.get(url) as response: html_content = await response.text() else: - logger.error(f"Unable to convert nothing to markdown.") + err(f"Unable to convert nothing to markdown.") return None # Use readability to extract the main content @@ -525,12 +355,13 @@ async def process_archive( markdown_path.parent.mkdir(parents=True, exist_ok=True) with open(markdown_path, 'w', encoding=encoding) as md_file: md_file.write(markdown_content) - logger.debug(f"Successfully saved to {markdown_path}") + debug(f"Successfully saved to {markdown_path}") return markdown_path except Exception as e: - logger.warning(f"Failed to write markdown file: {str(e)}") + warn(f"Failed to write markdown file: {str(e)}") return None + def download_file(url, folder): os.makedirs(folder, exist_ok=True) filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[-1] @@ -552,16 +383,17 @@ def download_file(url, folder): with open(filepath, 'wb') as f: f.write(response.content) else: - logger.error(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}") + err(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}") return None else: - logger.error(f"Failed to download image: {url}, status code: {response.status_code}") + err(f"Failed to download image: {url}, status code: {response.status_code}") return None except Exception as e: - logger.error(f"Failed to download image: {url}, error: {str(e)}") + err(f"Failed to download image: {url}, error: {str(e)}") return None return filename + def copy_file(local_path, folder): os.makedirs(folder, exist_ok=True) filename = os.path.basename(local_path) @@ -575,3 +407,188 @@ async def save_file(file: UploadFile, folder: Path) -> Path: with open(file_path, 'wb') as f: shutil.copyfileobj(file.file, f) return file_path + + +deprecated = ''' +async def process_article2( + bg_tasks: BackgroundTasks, + parsed_content: Article, + tts_mode: str = "summary", + voice: str = DEFAULT_11L_VOICE +): + timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M') + + readable_title = sanitize_filename(parsed_content.title or timestamp) + markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md") + + try: + summary = await llm.summarize_text(parsed_content.clean_doc, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.") + summary = summary.replace('\n', ' ') # Remove line breaks + + if tts_mode == "full" or tts_mode == "content": + tts_text = parsed_content.clean_doc + elif tts_mode == "summary" or tts_mode == "excerpt": + tts_text = summary + else: + tts_text = None + + banner_markdown = '' + try: + banner_url = parsed_content.top_image + if banner_url != '': + banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR)) + if banner_image: + banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" + + except Exception as e: + err(f"No image found in article") + + authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.authors) + published_date = parsed_content.publish_date + frontmatter = f"""--- +title: {readable_title} +authors: {authors} +published: {published_date} +added: {timestamp} +banner: "{banner_markdown}" +tags: + +""" + frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.tags) + frontmatter += '\n---\n' + + body = f"# {readable_title}\n\n" + if tts_text: + audio_filename = f"{published_date} {readable_title}" + try: + audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename, + output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR) + audio_ext = Path(audio_path).suffix + obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" + body += f"{obsidian_link}\n\n" + except Exception as e: + err(f"Failed to generate TTS for np3k. {e}") + + try: + body += f"by {authors} in {parsed_content.canonical_link}" # update with method for getting the newspaper name + body += f"> [!summary]+\n" + body += f"> {summary}\n\n" + body += parsed_content["content"] + markdown_content = frontmatter + body + + except Exception as e: + err(f"Failed to combine elements of article markdown.") + + try: + with open(markdown_filename, 'w') as md_file: + md_file.write(markdown_content) + + info(f"Successfully saved to {markdown_filename}") + await note.add_to_daily_note(relative_path) + return markdown_filename + + except Exception as e: + err(f"Failed to write markdown file") + raise HTTPException(status_code=500, detail=str(e)) + + except Exception as e: + err(f"Failed to clip: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + + +async def process_article( + bg_tasks: BackgroundTasks, + url: str, + title: Optional[str] = None, + encoding: str = 'utf-8', + source: Optional[str] = None, + tts_mode: str = "summary", + voice: str = DEFAULT_11L_VOICE +): + + timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M') + + parsed_content = await parse_article(url, source) + if parsed_content is None: + return {"error": "Failed to retrieve content"} + + readable_title = sanitize_filename(title or parsed_content.get("title") or timestamp) + markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md") + + try: + summary = await llm.summarize_text(parsed_content["content"], "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.") + summary = summary.replace('\n', ' ') # Remove line breaks + + if tts_mode == "full" or tts_mode == "content": + tts_text = parsed_content["content"] + elif tts_mode == "summary" or tts_mode == "excerpt": + tts_text = summary + else: + tts_text = None + + banner_markdown = '' + try: + banner_url = parsed_content.get('image', '') + if banner_url != '': + banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR)) + if banner_image: + banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" + + except Exception as e: + err(f"No image found in article") + + authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown'])) + + frontmatter = f"""--- +title: {readable_title} +authors: {', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))} +published: {parsed_content.get('date_published', 'Unknown')} +added: {timestamp} +excerpt: {parsed_content.get('excerpt', '')} +banner: "{banner_markdown}" +tags: + +""" + frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.get('tags', [])) + frontmatter += '\n---\n' + + body = f"# {readable_title}\n\n" + + if tts_text: + datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S") + audio_filename = f"{datetime_str} {readable_title}" + try: + audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename, + output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR) + audio_ext = Path(audio_path).suffix + obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" + body += f"{obsidian_link}\n\n" + except Exception as e: + err(f"Failed to generate TTS for np3k. {e}") + + try: + body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n" + body += f"> [!summary]+\n" + body += f"> {summary}\n\n" + body += parsed_content["content"] + markdown_content = frontmatter + body + + except Exception as e: + err(f"Failed to combine elements of article markdown.") + + try: + with open(markdown_filename, 'w', encoding=encoding) as md_file: + md_file.write(markdown_content) + + info(f"Successfully saved to {markdown_filename}") + await note.add_to_daily_note(relative_path) + return markdown_filename + + except Exception as e: + err(f"Failed to write markdown file") + raise HTTPException(status_code=500, detail=str(e)) + + except Exception as e: + err(f"Failed to clip {url}: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) +''' \ No newline at end of file diff --git a/sijapi/routers/note.py b/sijapi/routers/note.py index b3770a6..54b5813 100644 --- a/sijapi/routers/note.py +++ b/sijapi/routers/note.py @@ -17,24 +17,28 @@ from fastapi import HTTPException, status from pathlib import Path from fastapi import APIRouter, Query, HTTPException from sijapi import API, L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO -from sijapi.routers import cal, img, loc, tts, llm, time, weather, asr -from sijapi.utilities import assemble_journal_path, assemble_archive_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING +from sijapi.routers import asr, cal, gis, img, llm, serve, time, tts, weather +from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING from sijapi.classes import Location note = APIRouter() logger = L.get_module_logger("note") - +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) @note.post("/note/add") async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None): - logger.debug(f"Received request on /note/add...") + debug(f"Received request on /note/add...") if not file and not text: - logger.warning(f"... without any file or text!") + warn(f"... without any file or text!") raise HTTPException(status_code=400, detail="Either text or a file must be provided") else: result = await process_for_daily_note(file, text, source, bg_tasks) - logger.info(f"Result on /note/add: {result}") + info(f"Result on /note/add: {result}") return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201) @@ -44,7 +48,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: transcription_entry = "" file_entry = "" if file: - logger.debug("File received...") + debug("File received...") file_content = await file.read() audio_io = BytesIO(file_content) @@ -52,18 +56,18 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: guessed_type = mimetypes.guess_type(file.filename) file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream" - logger.debug(f"Processing as {file_type}...") + debug(f"Processing as {file_type}...") # Extract the main type (e.g., 'audio', 'image', 'video') main_type = file_type.split('/')[0] subdir = main_type.title() if main_type else "Documents" absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename) - logger.debug(f"Destination path: {absolute_path}") + debug(f"Destination path: {absolute_path}") with open(absolute_path, 'wb') as f: f.write(file_content) - logger.debug(f"Processing {f.name}...") + debug(f"Processing {f.name}...") if main_type == 'audio': transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6)) @@ -74,7 +78,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: file_entry = f"[Source]({relative_path})" text_entry = text if text else "" - logger.debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}") + debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}") return await add_to_daily_note(transcription_entry, file_entry, text_entry, now) @@ -169,7 +173,7 @@ added: {timestamp} obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" body += f"{obsidian_link}\n\n" except Exception as e: - logger.error(f"Failed in the TTS portion of clipping: {e}") + err(f"Failed in the TTS portion of clipping: {e}") body += f"> [!summary]+\n" body += f"> {summary}\n\n" @@ -182,12 +186,12 @@ added: {timestamp} with open(markdown_filename, 'w', encoding=encoding) as md_file: md_file.write(markdown_content) - logger.info(f"Successfully saved to {markdown_filename}") + info(f"Successfully saved to {markdown_filename}") return markdown_filename except Exception as e: - logger.error(f"Failed to clip: {str(e)}") + err(f"Failed to clip: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) def list_and_correct_impermissible_files(root_dir, rename: bool = False): @@ -198,7 +202,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False): if check_file_name(filename): file_path = Path(dirpath) / filename impermissible_files.append(file_path) - logger.debug(f"Impermissible file found: {file_path}") + debug(f"Impermissible file found: {file_path}") # Sanitize the file name new_filename = sanitize_filename(filename) @@ -216,7 +220,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False): # Rename the file if rename: os.rename(file_path, new_file_path) - logger.debug(f"Renamed: {file_path} -> {new_file_path}") + debug(f"Renamed: {file_path} -> {new_file_path}") return impermissible_files @@ -233,7 +237,7 @@ async def build_daily_note_range_endpoint(dt_start: str, dt_end: str): results = [] current_date = start_date while current_date <= end_date: - formatted_date = await loc.dt(current_date) + formatted_date = await gis.dt(current_date) result = await build_daily_note(formatted_date) results.append(result) current_date += timedelta(days=1) @@ -242,6 +246,37 @@ async def build_daily_note_range_endpoint(dt_start: str, dt_end: str): +@note.get("/note/create") +async def build_daily_note_getpoint(): + try: + loc = await gis.get_last_location() + if not loc: + raise ValueError("Unable to retrieve last location") + + tz = await GEO.tz_current(loc) + if not tz: + raise ValueError(f"Unable to determine timezone for location: {loc}") + + date_time = dt_datetime.now(tz) + path = await build_daily_note(date_time, loc.latitude, loc.longitude) + path_str = str(path) + + info(f"Successfully created daily note at {path_str}") + return JSONResponse(content={"path": path_str}, status_code=200) + + except ValueError as ve: + error_msg = f"Value Error in build_daily_note_getpoint: {str(ve)}" + err(error_msg) + raise HTTPException(status_code=400, detail=error_msg) + + except Exception as e: + error_msg = f"Unexpected error in build_daily_note_getpoint: {str(e)}" + err(error_msg) + err(f"Traceback: {traceback.format_exc()}") + raise HTTPException(status_code=500, detail="An unexpected error occurred") + + + @note.post("/note/create") async def build_daily_note_endpoint( date_str: Optional[str] = Form(dt_datetime.now().strftime("%Y-%m-%d")), @@ -258,10 +293,10 @@ async def build_daily_note_endpoint( else: raise ValueError("Location is not provided or invalid.") except (ValueError, AttributeError, TypeError) as e: - logger.warning(f"Falling back to localized datetime due to error: {e}") + warn(f"Falling back to localized datetime due to error: {e}") try: - date_time = await loc.dt(date_str) - places = await loc.fetch_locations(date_time) + date_time = await gis.dt(date_str) + places = await gis.fetch_locations(date_time) lat, lon = places[0].latitude, places[0].longitude except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=400) @@ -278,14 +313,14 @@ async def build_daily_note(date_time: dt_datetime, lat: float = None, lon: float Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match. ''' absolute_path, _ = assemble_journal_path(date_time) - logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.") + warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.") formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting header = f"# [[{day_before}|← ]] {formatted_day} [[{day_after}| →]]\n\n" if not lat or not lon: - places = await loc.fetch_locations(date_time) + places = await gis.fetch_locations(date_time) lat, lon = places[0].latitude, places[0].longitude location = await GEO.code((lat, lon)) @@ -308,6 +343,10 @@ Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses th _, note_path = assemble_journal_path(date_time, filename="Notes", extension=".md", no_timestamp = True) note_embed = f"![[{note_path}]]" + _, map_path = assemble_journal_path(date_time, filename="Map", extension=".png", no_timestamp = True) + map = await gis.generate_and_save_heatmap(date_time, output_path=map_path) + map_embed = f"![[{map_path}]]" + _, banner_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True) body = f"""--- @@ -320,6 +359,7 @@ created: "{dt_datetime.now().strftime("%Y-%m-%d %H:%M:%S")}" {header} {weather_embed} +{map_path} ## Events {event_embed} @@ -369,7 +409,7 @@ async def update_frontmatter(date_time: dt_datetime, key: str, value: str): # Check if the file exists if not file_path.exists(): - logger.critical(f"Markdown file not found at {file_path}") + crit(f"Markdown file not found at {file_path}") raise HTTPException(status_code=404, detail="Markdown file not found.") # Read the file @@ -416,32 +456,29 @@ async def banner_endpoint(dt: str, location: str = None, forecast: str = None, m ''' Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary. ''' - logger.debug(f"banner_endpoint requested with date: {dt} ({type(dt)})") - date_time = await loc.dt(dt) - logger.debug(f"date_time after localization: {date_time} ({type(date_time)})") + debug(f"banner_endpoint requested with date: {dt} ({type(dt)})") + date_time = await gis.dt(dt) + debug(f"date_time after localization: {date_time} ({type(date_time)})") context = await generate_context(dt, location, forecast, mood, other_context) jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context) return jpg_path async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None): - # logger.debug(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}") - date_time = await loc.dt(dt) - logger.debug(f"generate_banner called with date_time: {date_time}") + date_time = await gis.dt(dt) destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True) - logger.debug(f"destination path generated: {destination_path}") if not location or not isinstance(location, Location): - locations = await loc.fetch_locations(date_time) + locations = await gis.fetch_locations(date_time) if locations: location = locations[0] if not forecast: forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude) prompt = await generate_context(date_time, location, forecast, mood, other_context) - logger.debug(f"Prompt: {prompt}") + debug(f"Prompt: {prompt}") final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path) if not str(local_path) in str(final_path): - logger.info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}") + info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}") jpg_embed = f"\"![[{local_path}]]\"" await update_frontmatter(date_time, "banner", jpg_embed) return local_path @@ -469,7 +506,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s if geocoded_location.display_name or geocoded_location.city or geocoded_location.country: return await generate_context(date_time, geocoded_location, forecast, mood, other_context) else: - logger.warning(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.") + warn(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.") elif location and isinstance(location, str): display_name = f"Location: {location}\n" else: @@ -507,7 +544,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s async def get_note(date_time: dt_datetime): - date_time = await loc.dt(date_time); + date_time = await gis.dt(date_time); absolute_path, local_path = assemble_journal_path(date_time, filename = "Notes", extension = ".md", no_timestamp = True) if absolute_path.is_file(): @@ -536,9 +573,9 @@ async def note_weather_get( ): force_refresh_weather = refresh == "True" try: - date_time = dt_datetime.now() if date == "0" else await loc.dt(date) - logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.") - logger.debug(f"date: {date} .. date_time: {date_time}") + date_time = dt_datetime.now() if date == "0" else await gis.dt(date) + warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.") + debug(f"date: {date} .. date_time: {date_time}") content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon) return JSONResponse(content={"forecast": content}, status_code=200) @@ -546,14 +583,14 @@ async def note_weather_get( return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code) except Exception as e: - logger.error(f"Error in note_weather_get: {str(e)}") + err(f"Error in note_weather_get: {str(e)}") raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") @note.post("/update/note/{date}") async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse: - date_time = await loc.dt(date) - logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.") + date_time = await gis.dt(date) + warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.") force_refresh_weather = refresh == "True" await update_dn_weather(date_time, force_refresh_weather) await update_daily_note_events(date_time) @@ -561,52 +598,52 @@ async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refres return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}" async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None): - logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.") + warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.") try: if lat and lon: place = await GEO.code((lat, lon)) else: - logger.debug(f"Updating weather for {date_time}") - places = await loc.fetch_locations(date_time) + debug(f"Updating weather for {date_time}") + places = await gis.fetch_locations(date_time) place = places[0] lat = place.latitude lon = place.longitude - logger.debug(f"lat: {lat}, lon: {lon}, place: {place}") + debug(f"lat: {lat}, lon: {lon}, place: {place}") city = GEO.find_override_location(lat, lon) if city: - logger.info(f"Using override location: {city}") + info(f"Using override location: {city}") else: if place.city and place.city != "": city = place.city - logger.info(f"City in data: {city}") + info(f"City in data: {city}") else: location = await GEO.code((lat, lon)) - logger.debug(f"location: {location}") + debug(f"location: {location}") city = location.name city = city if city else location.city city = city if city else location.house_number + ' ' + location.road - logger.debug(f"City geocoded: {city}") + debug(f"City geocoded: {city}") # Assemble journal path absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True) - logger.debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}") + debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}") try: - logger.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather") + debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather") day = await weather.get_weather(date_time, lat, lon, force_refresh) - logger.debug(f"day information obtained from get_weather: {day}") + debug(f"day information obtained from get_weather: {day}") if day: DailyWeather = day.get('DailyWeather') HourlyWeather = day.get('HourlyWeather') if DailyWeather: - # logger.debug(f"Day: {DailyWeather}") + # debug(f"Day: {DailyWeather}") icon = DailyWeather.get('icon') - logger.debug(f"Icon: {icon}") + debug(f"Icon: {icon}") weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather") @@ -675,38 +712,38 @@ async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds) detailed_forecast += f"```\n\n" - logger.debug(f"Detailed forecast: {detailed_forecast}.") + debug(f"Detailed forecast: {detailed_forecast}.") with open(absolute_path, 'w', encoding='utf-8') as note_file: note_file.write(detailed_forecast) - logger.debug(f"Operation complete.") + debug(f"Operation complete.") return narrative else: - logger.error(f"Failed to get DailyWeather from day: {day}") + err(f"Failed to get DailyWeather from day: {day}") else: - logger.error(f"Failed to get day") + err(f"Failed to get day") raise HTTPException(status_code=500, detail="Failed to retrieve weather data") except HTTPException as e: - logger.error(f"HTTP error: {e}") - logger.error(traceback.format_exc()) + err(f"HTTP error: {e}") + err(traceback.format_exc()) raise e except Exception as e: - logger.error(f"Error: {e}") - logger.error(traceback.format_exc()) + err(f"Error: {e}") + err(traceback.format_exc()) raise HTTPException(status_code=999, detail=f"Error: {e}") except ValueError as ve: - logger.error(f"Value error in update_dn_weather: {str(ve)}") - logger.error(traceback.format_exc()) + err(f"Value error in update_dn_weather: {str(ve)}") + err(traceback.format_exc()) raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}") except Exception as e: - logger.error(f"Error in update_dn_weather: {str(e)}") - logger.error(traceback.format_exc()) + err(f"Error in update_dn_weather: {str(e)}") + err(traceback.format_exc()) raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}") def format_hourly_time(hour): @@ -714,8 +751,8 @@ def format_hourly_time(hour): hour_12 = convert_to_12_hour_format(hour.get("datetime")) return hour_12 except Exception as e: - logger.error(f"Error in format_hourly_time: {str(e)}") - logger.error(traceback.format_exc()) + err(f"Error in format_hourly_time: {str(e)}") + err(traceback.format_exc()) return "" def format_hourly_icon(hour, sunrise, sunset): @@ -725,7 +762,7 @@ def format_hourly_icon(hour, sunrise, sunset): precip = hour.get('precip', float(0.0)) precip_prob = hour.get('precipprob', float(0.0)) - logger.debug(f"precip: {precip}, prob: {precip_prob}") + debug(f"precip: {precip}, prob: {precip_prob}") sp_str = None @@ -749,8 +786,8 @@ def format_hourly_icon(hour, sunrise, sunset): return formatted except Exception as e: - logger.error(f"Error in format_hourly_special: {str(e)}") - logger.error(traceback.format_exc()) + err(f"Error in format_hourly_special: {str(e)}") + err(traceback.format_exc()) return "" def format_hourly_temperature(hour): @@ -758,8 +795,8 @@ def format_hourly_temperature(hour): temp_str = f"{hour.get('temp', '')}˚ F" return temp_str except Exception as e: - logger.error(f"Error in format_hourly_temperature: {str(e)}") - logger.error(traceback.format_exc()) + err(f"Error in format_hourly_temperature: {str(e)}") + err(traceback.format_exc()) return "" def format_hourly_wind(hour): @@ -769,8 +806,8 @@ def format_hourly_wind(hour): wind_str = f"{str(windspeed)}:LiWind: {winddir}" return wind_str except Exception as e: - logger.error(f"Error in format_hourly_wind: {str(e)}") - logger.error(traceback.format_exc()) + err(f"Error in format_hourly_wind: {str(e)}") + err(traceback.format_exc()) return "" def assemble_hourly_data_table(times, condition_symbols, temps, winds): @@ -783,7 +820,7 @@ def assemble_hourly_data_table(times, condition_symbols, temps, winds): def get_icon_and_admonition(icon_str) -> Tuple: - logger.debug(f"Received request for emoji {icon_str}") + debug(f"Received request for emoji {icon_str}") if icon_str.startswith(":") and icon_str.endswith(":"): return icon_str @@ -884,7 +921,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s total_events = len(event_data["events"]) event_markdown = f"```ad-events" for event in event_data["events"]: - logger.debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}") + debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}") if not event['name'].startswith('TC '): url = f"hook://ical/eventID={event['uid']}calendarID=17" if event['url']: @@ -957,23 +994,23 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s @note.get("/note/events", response_class=PlainTextResponse) async def note_events_endpoint(date: str = Query(None)): - date_time = await loc.dt(date) if date else await loc.dt(dt_datetime.now()) + date_time = await gis.dt(date) if date else await gis.dt(dt_datetime.now()) response = await update_daily_note_events(date_time) return PlainTextResponse(content=response, status_code=200) async def update_daily_note_events(date_time: dt_datetime): - logger.debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}") + debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}") try: events = await cal.get_events(date_time, date_time) - logger.debug(f"Raw events: {events}") + debug(f"Raw events: {events}") event_data = { "date": date_time.strftime('%Y-%m-%d'), "events": events } events_markdown = await format_events_as_markdown(event_data) - logger.debug(f"Markdown events: {events_markdown}") + debug(f"Markdown events: {events_markdown}") absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True) - logger.debug(f"Writing events to file: {absolute_path}") + debug(f"Writing events to file: {absolute_path}") with open(absolute_path, 'w', encoding='utf-8') as note_file: note_file.write(events_markdown) @@ -981,7 +1018,7 @@ async def update_daily_note_events(date_time: dt_datetime): return events_markdown except Exception as e: - logger.error(f"Error processing events: {e}") + err(f"Error processing events: {e}") raise HTTPException(status_code=500, detail=str(e)) diff --git a/sijapi/routers/rag.py b/sijapi/routers/rag.py index d4a616f..1269b94 100644 --- a/sijapi/routers/rag.py +++ b/sijapi/routers/rag.py @@ -8,6 +8,11 @@ from sijapi import L rag = APIRouter() logger = L.get_module_logger("rag") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) rag.get("/rag/search") async def rag_search_endpoint(query: str, scope: str): diff --git a/sijapi/routers/scrape.py b/sijapi/routers/scrape.py new file mode 100644 index 0000000..290ea4b --- /dev/null +++ b/sijapi/routers/scrape.py @@ -0,0 +1,177 @@ +import asyncio +import json +import re +from fastapi import APIRouter, HTTPException +from typing import Dict, List, Any +import aiohttp +import PyPDF2 +import io +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from pathlib import Path +from sijapi import Scrape, L, Dir + +logger = L.get_module_logger('scrape') +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) + +scrape = APIRouter() + +# Ensure Dir.DATA is a Path object +Dir.DATA = Path(Dir.DATA).expanduser() + +def save_to_json(data: List[Dict], output_file: str): + output_path = Dir.DATA / output_file + info(f"Saving data to {output_path}") + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w') as f: + json.dump(data, f, indent=2) + info(f"Data saved successfully to {output_path}") + +def load_from_json(output_file: str) -> List[Dict]: + output_path = Dir.DATA / output_file + info(f"Loading data from {output_path}") + try: + with open(output_path, 'r') as f: + return json.load(f) + except FileNotFoundError: + warn(f"File {output_path} not found") + return [] + +async def fetch_content(config: Any) -> str: + info(f"Fetching content from {config.url}") + if config.content.js_render: + return await fetch_with_selenium(config.url) + + async with aiohttp.ClientSession() as session: + async with session.get(config.url) as response: + if config.content.type == 'pdf': + return await handle_pdf(response) + elif config.content.type in ['html', 'xml']: + return await handle_html_xml(response, config.content.selector) + elif config.content.type == 'json': + return await handle_json(response) + elif config.content.type == 'txt': + return await response.text() + else: + warn(f"Unsupported content type: {config.content.type}") + return await response.text() + +async def fetch_with_selenium(url: str) -> str: + options = Options() + options.add_argument("--headless") + driver = webdriver.Chrome(options=options) + driver.get(url) + content = driver.page_source + driver.quit() + return content + +async def handle_pdf(response): + pdf_content = await response.read() + pdf_file = io.BytesIO(pdf_content) + pdf_reader = PyPDF2.PdfReader(pdf_file) + return "\n".join(page.extract_text() for page in pdf_reader.pages) + +async def handle_html_xml(response, selector): + content = await response.text() + soup = BeautifulSoup(content, 'html.parser') + if selector: + return soup.select_one(selector).get_text() + return soup.get_text() + +async def handle_json(response): + return await response.json() + +def apply_processing_step(data: Any, step: Any) -> Any: + info(f"Applying processing step: {step.type}") + if step.type == 'regex_split': + return re.split(step.pattern, data)[1:] + elif step.type == 'keyword_filter': + return [item for item in data if any(keyword.lower() in str(item).lower() for keyword in step.keywords)] + elif step.type == 'regex_extract': + if isinstance(data, list): + return [apply_regex_extract(item, step.extractions) for item in data] + return apply_regex_extract(data, step.extractions) + debug(f"Unknown processing step type: {step.type}") + return data + +def apply_regex_extract(text: str, extractions: List[Any]) -> Dict: + debug(f"Applying regex extraction on text of length {len(text)}") + result = {} + for extraction in extractions: + extraction_dict = extraction.dict() if hasattr(extraction, 'dict') else extraction + flags = sum(getattr(re, flag.upper()) for flag in extraction_dict.get('flags', [])) + + pattern = extraction_dict['pattern'] + matches = re.findall(pattern, text, flags=flags) + if matches: + if extraction_dict.get('all_matches', False): + if extraction_dict.get('group_names'): + result[extraction_dict['name']] = [dict(zip(extraction_dict['group_names'], match)) for match in matches] + else: + result[extraction_dict['name']] = matches + else: + result[extraction_dict['name']] = matches[-1].strip() # Take the last match + + debug(f"Extracted {len(result)} items") + return result + +def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]: + info("Applying post-processing steps") + for step in post_processing: + if step.type == 'custom': + data = globals()[step.function](data) + return data + +def data_has_changed(new_data: List[Dict], old_data: List[Dict]) -> bool: + return new_data != old_data + +@scrape.get("/scrape/{config_name}") +async def scrape_site(config_name: str): + info(f"Starting scrape operation for {config_name}") + + if not hasattr(Scrape, 'configurations'): + # If 'configurations' doesn't exist, assume the entire Scrape object is the configuration + config = Scrape if Scrape.name == config_name else None + else: + config = next((c for c in Scrape.configurations if c.name == config_name), None) + + if not config: + raise HTTPException(status_code=404, detail=f"Configuration '{config_name}' not found") + + raw_data = await fetch_content(config) + processed_data = raw_data + + for step in config.processing: + processed_data = apply_processing_step(processed_data, step) + + processed_data = apply_post_processing(processed_data, config.post_processing) + + # Resolve Dir.DATA in the output file path + output_file = config.output_file.replace('{{ Dir.DATA }}', str(Dir.DATA)) + previous_data = load_from_json(output_file) + + if data_has_changed(processed_data, previous_data): + save_to_json(processed_data, output_file) + info("Scrape completed with updates") + return {"message": "Site updated", "data": processed_data} + else: + info("Scrape completed with no updates") + return {"message": "No updates", "data": processed_data} + +def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]: + info("Applying post-processing steps") + for step in post_processing: + if step.type == 'regex_extract': + for entry in data: + if step.field in entry: + matches = re.findall(step.pattern, entry[step.field]) + if step.all_matches: + entry[step.output_field] = [step.format.format(*match) for match in matches] + elif matches: + entry[step.output_field] = step.format.format(*matches[0]) + return data diff --git a/sijapi/routers/serve.py b/sijapi/routers/serve.py index 1ecd4a0..3e6684f 100644 --- a/sijapi/routers/serve.py +++ b/sijapi/routers/serve.py @@ -28,16 +28,22 @@ from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from sijapi import ( - L, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, + L, API, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, - MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, DATA_DIR, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR + MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR ) +from sijapi.classes import WidgetUpdate from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path -from sijapi.routers import loc, note - +from sijapi.routers import gis serve = APIRouter(tags=["public"]) + logger = L.get_module_logger("serve") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.err(text) +def crit(text: str): logger.critical(text) @serve.get("/pgp") async def get_pgp(): @@ -51,7 +57,6 @@ def serve_image(image_name: str): else: return {"error": "Image not found"} - def construct_journal_path(date_str: str) -> Path: try: date_obj = datetime.strptime(date_str, '%Y-%m-%d') @@ -67,19 +72,20 @@ def is_valid_date(date_str: str) -> bool: except ValueError: return False + @serve.get("/notes/{file_path:path}") async def get_file_endpoint(file_path: str): try: - date_time = await loc.dt(file_path); + date_time = await gis.dt(file_path); absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True) except ValueError as e: - logger.debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path") + debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path") absolute_path = OBSIDIAN_VAULT_DIR / file_path if not absolute_path.suffix: absolute_path = Path(absolute_path.with_suffix(".md")) if not absolute_path.is_file(): - logger.warning(f"{absolute_path} is not a valid file it seems.") + warn(f"{absolute_path} is not a valid file it seems.") elif absolute_path.suffix == '.md': try: with open(absolute_path, 'r', encoding='utf-8') as file: @@ -93,19 +99,6 @@ async def get_file_endpoint(file_path: str): raise HTTPException(status_code=400, detail="Unsupported file type") -with open(CASETABLE_PATH, 'r') as file: - CASETABLE = json.load(file) - -class WidgetUpdate(BaseModel): - text: Optional[str] = None - progress: Optional[str] = None - icon: Optional[str] = None - color: Optional[str] = None - url: Optional[str] = None - shortcut: Optional[str] = None - graph: Optional[str] = None - - @serve.get("/health_check") def hook_health(): shellfish_health_check() @@ -130,50 +123,33 @@ async def hook_changedetection(webhook_data: dict): if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]): filename = ALERTS_DIR / f"alert_{int(time.time())}.json" filename.write_text(json.dumps(webhook_data, indent=4)) - notify(message) return {"status": "received"} - -@serve.post("/cl/search") -async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks): - client_ip = request.client.host - logger.debug(f"Received request from IP: {client_ip}") - data = await request.json() - payload = data['payload'] - results = data['payload']['results'] - - # Save the payload data - timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json" - with open(payload_file, 'w') as file: - json.dump(payload, file, indent=2) - - for result in results: - bg_tasks.add_task(cl_search_process_result, result) - return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK) - -@serve.post("/cl/docket") -async def hook_cl_docket(request: Request): - client_ip = request.client.host - logger.debug(f"Received request from IP: {client_ip}") - data = await request.json() - await cl_docket(data, client_ip) - async def notify(alert: str): + fail = True try: - await notify_shellfish(alert) + if API.EXTENSIONS.shellfish == "on" or API.EXTENSIONS.shellfish == True: + await notify_shellfish(alert) + fail = False - if TS_ID == MAC_ID: - await notify_local(alert) - else: - await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW) - - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to send alert: {str(e)}") + if API.EXTENSIONS.macnotify == "on" or API.EXTENSIONS.macnotify == True: + if TS_ID == MAC_ID: + await notify_local(alert) + fail = False + else: + await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW) + fail = False + except: + fail = True - return {"message": alert} + if fail == False: + info(f"Delivered alert: {alert}") + return {"message": alert} + else: + crit(f"Failed to deliver alert: {alert}") + return {"message": f"Failed to deliver alert: {alert}"} async def notify_local(message: str): await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'') @@ -194,228 +170,256 @@ async def notify_remote(host: str, message: str, username: str = None, password: ssh.close() -async def notify_shellfish(alert: str): - key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b" - user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm" - iv = "ab5bbeb426015da7eedcee8bee3dffb7" - - plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n" +if API.EXTENSIONS.shellfish == "on" or API.EXTENSIONS.shellfish == True: + async def notify_shellfish(alert: str): + key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b" + user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm" + iv = "ab5bbeb426015da7eedcee8bee3dffb7" + + plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n" - openssl_command = [ - "openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv - ] - - process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode()) - - if process.returncode != 0: - raise Exception(f"OpenSSL encryption failed: {stderr.decode()}") + openssl_command = [ + "openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv + ] + + process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode()) + + if process.returncode != 0: + raise Exception(f"OpenSSL encryption failed: {stderr.decode()}") - base64_encoded = stdout.decode().strip() + base64_encoded = stdout.decode().strip() - url = f"https://secureshellfish.app/push/?user={user}&mutable" - headers = {"Content-Type": "text/plain"} - async with aiohttp.ClientSession() as session: - async with session.post(url, headers=headers, data=base64_encoded) as response: - if response.status != 200: - raise Exception(f"Failed to send notification: {response.status_code}") - -## SHELLFISH ## -def shellfish_health_check(): - addresses = [ - "https://api.sij.ai/health", - "http://100.64.64.20:4444/health", - "http://100.64.64.30:4444/health", - "http://100.64.64.11:4444/health", - "http://100.64.64.15:4444/health" - ] - - results = [] - up_count = 0 - for address in addresses: + url = f"https://secureshellfish.app/push/?user={user}&mutable" + headers = {"Content-Type": "text/plain"} + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, data=base64_encoded) as response: + if response.status != 200: + raise Exception(f"Failed to send notification: {response.status_code}") + + def shellfish_health_check(): + addresses = [ + "https://api.sij.ai/health", + "http://100.64.64.20:4444/health", + "http://100.64.64.30:4444/health", + "http://100.64.64.11:4444/health", + "http://100.64.64.15:4444/health" + ] + + results = [] + up_count = 0 + for address in addresses: + try: + response = requests.get(address) + if response.status_code == 200: + results.append(f"{address} is up") + up_count += 1 + else: + results.append(f"{address} returned status code {response.status_code}") + except requests.exceptions.RequestException: + results.append(f"{address} is down") + + # Generate a simple text-based graph + graph = '|' * up_count + '.' * (len(addresses) - up_count) + text_update = "\n".join(results) + + widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"] + output = shellfish_run_widget_command(widget_command) + return {"output": output, "graph": graph} + + + def shellfish_update_widget(update: WidgetUpdate): + widget_command = ["widget"] + + if update.text: + widget_command.extend(["--text", update.text]) + if update.progress: + widget_command.extend(["--progress", update.progress]) + if update.icon: + widget_command.extend(["--icon", update.icon]) + if update.color: + widget_command.extend(["--color", update.color]) + if update.url: + widget_command.extend(["--url", update.url]) + if update.shortcut: + widget_command.extend(["--shortcut", update.shortcut]) + if update.graph: + widget_command.extend(["--text", update.graph]) + + output = shellfish_run_widget_command(widget_command) + return {"output": output} + + + def shellfish_run_widget_command(args: List[str]): + result = subprocess.run(args, capture_output=True, text=True, shell=True) + if result.returncode != 0: + raise HTTPException(status_code=500, detail=result.stderr) + return result.stdout + + +if API.EXTENSIONS.courtlistener == "on" or API.EXTENSIONS.courtlistener == True: + with open(CASETABLE_PATH, 'r') as file: + CASETABLE = json.load(file) + + @serve.post("/cl/search") + async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks): + client_ip = request.client.host + debug(f"Received request from IP: {client_ip}") + data = await request.json() + payload = data['payload'] + results = data['payload']['results'] + + # Save the payload data + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json" + with open(payload_file, 'w') as file: + json.dump(payload, file, indent=2) + + for result in results: + bg_tasks.add_task(cl_search_process_result, result) + return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK) + + @serve.post("/cl/docket") + async def hook_cl_docket(request: Request): + client_ip = request.client.host + debug(f"Received request from IP: {client_ip}") + data = await request.json() + await cl_docket(data, client_ip) + + async def cl_docket(data, client_ip, bg_tasks: BackgroundTasks): + payload = data['payload'] + results = data['payload']['results'] + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json" + with open(payload_file, 'w') as file: + json.dump(payload, file, indent=2) + + for result in results: + bg_tasks.add_task(cl_docket_process, result) + return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK) + + async def cl_docket_process(result): + async with httpx.AsyncClient() as session: + await cl_docket_process_result(result, session) + + async def cl_docket_process_result(result, session): + docket = str(result.get('docket')) + case_code, case_shortname = cl_case_details(docket) + date_filed = result.get('date_filed', 'No Date Filed') + try: - response = requests.get(address) - if response.status_code == 200: - results.append(f"{address} is up") - up_count += 1 + date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d') + except ValueError: + date_filed_formatted = 'NoDateFiled' + + # Fetching court docket information from the API + url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}" + headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'} + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as response: + if response.status == 200: + debug(f"Fetching CourtListener docket information for {docket}...") + data = await response.json() + court_docket = data['results'][0]['docket_number_core'] + court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number + case_name = data['results'][0]['case_name'] + debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.") + else: + debug("Failed to fetch data from CourtListener API.") + court_docket = 'NoCourtDocket' + case_name = 'NoCaseName' + + for document in result.get('recap_documents', []): + filepath_ia = document.get('filepath_ia') + filepath_local = document.get('filepath_local') + + if filepath_ia: + file_url = filepath_ia + debug(f"Found IA file at {file_url}.") + elif filepath_local: + file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}" + debug(f"Found local file at {file_url}.") else: - results.append(f"{address} returned status code {response.status_code}") - except requests.exceptions.RequestException: - results.append(f"{address} is down") - - # Generate a simple text-based graph - graph = '|' * up_count + '.' * (len(addresses) - up_count) - text_update = "\n".join(results) - - widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"] - output = shellfish_run_widget_command(widget_command) - return {"output": output, "graph": graph} + debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.") + continue + + document_number = document.get('document_number', 'NoDocumentNumber') + description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_") + description = description[:50] # Truncate description + # case_shortname = case_name # TEMPORARY OVERRIDE + file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf" + target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name + target_path.parent.mkdir(parents=True, exist_ok=True) + await cl_download_file(file_url, target_path, session) + debug(f"Downloaded {file_name} to {target_path}") + + def cl_case_details(docket): + case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"}) + case_code = case_info.get("code") + short_name = case_info.get("shortname") + return case_code, short_name + + async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36' + } + async with aiohttp.ClientSession() as session: + debug(f"Attempting to download {url} to {path}.") + try: + async with session.get(url, headers=headers, allow_redirects=True) as response: + if response.status == 403: + err(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.") + return + response.raise_for_status() + + # Check if the response content type is a PDF + content_type = response.headers.get('Content-Type') + if content_type != 'application/pdf': + err(f"Invalid content type: {content_type}. Skipping download.") + return + + # Create an in-memory buffer to store the downloaded content + buffer = io.BytesIO() + async for chunk in response.content.iter_chunked(1024): + buffer.write(chunk) + + # Reset the buffer position to the beginning + buffer.seek(0) + + # Validate the downloaded PDF content + try: + PdfReader(buffer) + except Exception as e: + err(f"Invalid PDF content: {str(e)}. Skipping download.") + return + + # If the PDF is valid, write the content to the file on disk + path.parent.mkdir(parents=True, exist_ok=True) + with path.open('wb') as file: + file.write(buffer.getvalue()) + + except Exception as e: + err(f"Error downloading file: {str(e)}") -def shellfish_update_widget(update: WidgetUpdate): - widget_command = ["widget"] + async def cl_search_process_result(result): + async with httpx.AsyncClient() as session: + download_url = result.get('download_url') + court_id = result.get('court_id') + case_name_short = result.get('caseNameShort') + case_name = result.get('caseName') + debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}") - if update.text: - widget_command.extend(["--text", update.text]) - if update.progress: - widget_command.extend(["--progress", update.progress]) - if update.icon: - widget_command.extend(["--icon", update.icon]) - if update.color: - widget_command.extend(["--color", update.color]) - if update.url: - widget_command.extend(["--url", update.url]) - if update.shortcut: - widget_command.extend(["--shortcut", update.shortcut]) - if update.graph: - widget_command.extend(["--text", update.graph]) + court_folder = court_id - output = shellfish_run_widget_command(widget_command) - return {"output": output} - - -def shellfish_run_widget_command(args: List[str]): - result = subprocess.run(args, capture_output=True, text=True, shell=True) - if result.returncode != 0: - raise HTTPException(status_code=500, detail=result.stderr) - return result.stdout - - -### COURTLISTENER FUNCTIONS ### -async def cl_docket(data, client_ip, bg_tasks: BackgroundTasks): - payload = data['payload'] - results = data['payload']['results'] - timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json" - with open(payload_file, 'w') as file: - json.dump(payload, file, indent=2) - - for result in results: - bg_tasks.add_task(cl_docket_process, result) - return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK) - -async def cl_docket_process(result): - async with httpx.AsyncClient() as session: - await cl_docket_process_result(result, session) - -async def cl_docket_process_result(result, session): - docket = str(result.get('docket')) - case_code, case_shortname = cl_case_details(docket) - date_filed = result.get('date_filed', 'No Date Filed') - - try: - date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d') - except ValueError: - date_filed_formatted = 'NoDateFiled' - - # Fetching court docket information from the API - url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}" - headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'} - async with aiohttp.ClientSession() as session: - async with session.get(url, headers=headers) as response: - if response.status == 200: - logger.debug(f"Fetching CourtListener docket information for {docket}...") - data = await response.json() - court_docket = data['results'][0]['docket_number_core'] - court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number - case_name = data['results'][0]['case_name'] - logger.debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.") + if case_name_short: + case_folder = case_name_short else: - logger.debug("Failed to fetch data from CourtListener API.") - court_docket = 'NoCourtDocket' - case_name = 'NoCaseName' + case_folder = case_name - for document in result.get('recap_documents', []): - filepath_ia = document.get('filepath_ia') - filepath_local = document.get('filepath_local') + file_name = download_url.split('/')[-1] + target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name + target_path.parent.mkdir(parents=True, exist_ok=True) - if filepath_ia: - file_url = filepath_ia - logger.debug(f"Found IA file at {file_url}.") - elif filepath_local: - file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}" - logger.debug(f"Found local file at {file_url}.") - else: - logger.debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.") - continue - - document_number = document.get('document_number', 'NoDocumentNumber') - description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_") - description = description[:50] # Truncate description - # case_shortname = case_name # TEMPORARY OVERRIDE - file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf" - target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name - target_path.parent.mkdir(parents=True, exist_ok=True) - await cl_download_file(file_url, target_path, session) - logger.debug(f"Downloaded {file_name} to {target_path}") - -def cl_case_details(docket): - case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"}) - case_code = case_info.get("code") - short_name = case_info.get("shortname") - return case_code, short_name - -async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None): - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36' - } - async with aiohttp.ClientSession() as session: - logger.debug(f"Attempting to download {url} to {path}.") - try: - async with session.get(url, headers=headers, allow_redirects=True) as response: - if response.status == 403: - logger.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.") - return - response.raise_for_status() - - # Check if the response content type is a PDF - content_type = response.headers.get('Content-Type') - if content_type != 'application/pdf': - logger.error(f"Invalid content type: {content_type}. Skipping download.") - return - - # Create an in-memory buffer to store the downloaded content - buffer = io.BytesIO() - async for chunk in response.content.iter_chunked(1024): - buffer.write(chunk) - - # Reset the buffer position to the beginning - buffer.seek(0) - - # Validate the downloaded PDF content - try: - PdfReader(buffer) - except Exception as e: - logger.error(f"Invalid PDF content: {str(e)}. Skipping download.") - return - - # If the PDF is valid, write the content to the file on disk - path.parent.mkdir(parents=True, exist_ok=True) - with path.open('wb') as file: - file.write(buffer.getvalue()) - - except Exception as e: - logger.error(f"Error downloading file: {str(e)}") - - -async def cl_search_process_result(result): - async with httpx.AsyncClient() as session: - download_url = result.get('download_url') - court_id = result.get('court_id') - case_name_short = result.get('caseNameShort') - case_name = result.get('caseName') - logger.debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}") - - court_folder = court_id - - if case_name_short: - case_folder = case_name_short - else: - case_folder = case_name - - file_name = download_url.split('/')[-1] - target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name - target_path.parent.mkdir(parents=True, exist_ok=True) - - await cl_download_file(download_url, target_path, session) - logger.debug(f"Downloaded {file_name} to {target_path}") + await cl_download_file(download_url, target_path, session) + debug(f"Downloaded {file_name} to {target_path}") diff --git a/sijapi/routers/signal.py b/sijapi/routers/signal.py index 35b0556..1e21e41 100644 --- a/sijapi/routers/signal.py +++ b/sijapi/routers/signal.py @@ -7,25 +7,27 @@ from semaphore import Bot, ChatContext from sijapi import L signal = APIRouter() + logger = L.get_module_logger("signal") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) + async def echo(ctx: ChatContext) -> None: if not ctx.message.empty(): await ctx.message.typing_started() await ctx.message.reply(ctx.message.get_body()) await ctx.message.typing_stopped() - async def main() -> None: """Start the bot.""" - # Connect the bot to number. async with Bot(os.environ["SIGNAL_PHONE_NUMBER"]) as bot: bot.register_handler("", echo) - - # Run the bot until you press Ctrl-C. await bot.start() - if __name__ == '__main__': import anyio anyio.run(main) \ No newline at end of file diff --git a/sijapi/routers/time.py b/sijapi/routers/time.py index 26e1497..5f1672d 100644 --- a/sijapi/routers/time.py +++ b/sijapi/routers/time.py @@ -26,11 +26,18 @@ from collections import defaultdict from dotenv import load_dotenv from traceback import format_exc from sijapi import L, TIMING_API_KEY, TIMING_API_URL -from sijapi.routers import loc +from sijapi.routers import gis time = APIRouter(tags=["private"]) logger = L.get_module_logger("time") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) + + script_directory = os.path.dirname(os.path.abspath(__file__)) # Configuration constants @@ -58,17 +65,17 @@ async def post_time_entry_to_timing(entry: Dict): 'Accept': 'application/json', 'X-Time-Zone': 'America/Los_Angeles' } - logger.debug(f"Received entry: {entry}") + debug(f"Received entry: {entry}") response = None # Initialize response try: async with httpx.AsyncClient() as client: response = await client.post(url, headers=headers, json=entry) response.raise_for_status() # This will only raise for 4xx and 5xx responses except httpx.HTTPStatusError as exc: - logger.debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}") + debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}") raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text)) except Exception as exc: - logger.debug(f"General exception caught: {exc}") + debug(f"General exception caught: {exc}") raise HTTPException(status_code=500, detail="An unexpected error occurred") if response: @@ -97,8 +104,8 @@ def truncate_project_title(title): async def fetch_and_prepare_timing_data(start: datetime, end: Optional[datetime] = None) -> List[Dict]: - # start_date = await loc.dt(start) - # end_date = await loc.dt(end) if end else None + # start_date = await gis.dt(start) + # end_date = await gis.dt(end) if end else None # Adjust the start date to include the day before and format the end date start_date_adjusted = (start - timedelta(days=1)).strftime("%Y-%m-%dT00:00:00") end_date_formatted = f"{datetime.strftime(end, '%Y-%m-%d')}T23:59:59" if end else f"{datetime.strftime(start, '%Y-%m-%d')}T23:59:59" @@ -312,8 +319,8 @@ async def get_timing_markdown3( ): # Fetch and process timing data - start = await loc.dt(start_date) - end = await loc.dt(end_date) if end_date else None + start = await gis.dt(start_date) + end = await gis.dt(end_date) if end_date else None timing_data = await fetch_and_prepare_timing_data(start, end) # Retain these for processing Markdown data with the correct timezone @@ -372,8 +379,8 @@ async def get_timing_markdown( start: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"), end: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}") ): - start_date = await loc.dt(start) - end_date = await loc.dt(end) + start_date = await gis.dt(start) + end_date = await gis.dt(end) markdown_formatted_data = await process_timing_markdown(start_date, end_date) return Response(content=markdown_formatted_data, media_type="text/markdown") @@ -441,8 +448,8 @@ async def get_timing_json( ): # Fetch and process timing data - start = await loc.dt(start_date) - end = await loc.dt(end_date) + start = await gis.dt(start_date) + end = await gis.dt(end_date) timing_data = await fetch_and_prepare_timing_data(start, end) # Convert processed data to the required JSON structure diff --git a/sijapi/routers/tts.py b/sijapi/routers/tts.py index 2af272a..3a3951c 100644 --- a/sijapi/routers/tts.py +++ b/sijapi/routers/tts.py @@ -31,6 +31,12 @@ from sijapi.utilities import sanitize_filename ### INITIALIZATIONS ### tts = APIRouter(tags=["trusted", "private"]) logger = L.get_module_logger("tts") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) + DEVICE = torch.device('cpu') MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2" @@ -47,7 +53,7 @@ async def list_11l_voices(): async with httpx.AsyncClient() as client: try: response = await client.get(url, headers=headers) - logger.debug(f"Response: {response}") + debug(f"Response: {response}") if response.status_code == 200: voices_data = response.json().get("voices", []) formatted_list = "" @@ -57,7 +63,7 @@ async def list_11l_voices(): formatted_list += f"{name}: `{id}`\n" except Exception as e: - logger.error(f"Error determining voice ID: {str(e)}") + err(f"Error determining voice ID: {str(e)}") return PlainTextResponse(formatted_list, status_code=200) @@ -67,18 +73,18 @@ async def select_voice(voice_name: str) -> str: try: # Case Insensitive comparison voice_name_lower = voice_name.lower() - logger.debug(f"Looking for {voice_name_lower}") + debug(f"Looking for {voice_name_lower}") for item in VOICE_DIR.iterdir(): - logger.debug(f"Checking {item.name.lower()}") + debug(f"Checking {item.name.lower()}") if item.name.lower() == f"{voice_name_lower}.wav": - logger.debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.") + debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.") return str(item) - logger.error(f"Voice file not found") + err(f"Voice file not found") raise HTTPException(status_code=404, detail="Voice file not found") except Exception as e: - logger.error(f"Voice file not found: {str(e)}") + err(f"Voice file not found: {str(e)}") return None @@ -113,8 +119,8 @@ async def generate_speech_endpoint( else: return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast) except Exception as e: - logger.error(f"Error in TTS: {str(e)}") - logger.error(traceback.format_exc()) + err(f"Error in TTS: {str(e)}") + err(traceback.format_exc()) raise HTTPException(status_code=666, detail="error in TTS") async def generate_speech( @@ -136,18 +142,20 @@ async def generate_speech( model = model if model else await get_model(voice, voice_file) title = title if title else "TTS audio" output_path = output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav" + if model == "eleven_turbo_v2": - logger.info("Using ElevenLabs.") + info("Using ElevenLabs.") audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir) - else: # if model == "xtts": - logger.info("Using XTTS2") + else: # if model == "xtts": + info("Using XTTS2") audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path) - #else: - # raise HTTPException(status_code=400, detail="Invalid model specified") - if podcast == True: - podcast_path = Path(PODCAST_DIR) / audio_file_path.name - logger.debug(f"Podcast path: {podcast_path}") + if not audio_file_path: + raise ValueError("TTS generation failed: audio_file_path is empty or None") + + if podcast: + podcast_path = Path(PODCAST_DIR) / Path(audio_file_path).name + debug(f"Podcast path: {podcast_path}") shutil.copy(str(audio_file_path), str(podcast_path)) bg_tasks.add_task(os.remove, str(audio_file_path)) return str(podcast_path) @@ -155,11 +163,12 @@ async def generate_speech( return str(audio_file_path) except Exception as e: - logger.error(f"Failed to generate speech: {str(e)}") + err(f"Failed to generate speech: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}") + async def get_model(voice: str = None, voice_file: UploadFile = None): if voice_file or (voice and await select_voice(voice)): return "xtts" @@ -171,7 +180,7 @@ async def get_model(voice: str = None, voice_file: UploadFile = None): raise HTTPException(status_code=400, detail="No model or voice specified") async def determine_voice_id(voice_name: str) -> str: - logger.debug(f"Searching for voice id for {voice_name}") + debug(f"Searching for voice id for {voice_name}") hardcoded_voices = { "alloy": "E3A1KVbKoWSIKSZwSUsW", @@ -188,23 +197,23 @@ async def determine_voice_id(voice_name: str) -> str: if voice_name in hardcoded_voices: voice_id = hardcoded_voices[voice_name] - logger.debug(f"Found voice ID - {voice_id}") + debug(f"Found voice ID - {voice_id}") return voice_id - logger.debug(f"Requested voice not among the hardcoded options.. checking with 11L next.") + debug(f"Requested voice not among the hardcoded options.. checking with 11L next.") url = "https://api.elevenlabs.io/v1/voices" headers = {"xi-api-key": ELEVENLABS_API_KEY} async with httpx.AsyncClient() as client: try: response = await client.get(url, headers=headers) - logger.debug(f"Response: {response}") + debug(f"Response: {response}") if response.status_code == 200: voices_data = response.json().get("voices", []) for voice in voices_data: if voice_name == voice["voice_id"] or voice_name == voice["name"]: return voice["voice_id"] except Exception as e: - logger.error(f"Error determining voice ID: {str(e)}") + err(f"Error determining voice ID: {str(e)}") # as a last fallback, rely on David Attenborough return "b42GBisbu9r5m5n6pHF7" @@ -248,7 +257,7 @@ async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> s async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str: if voice: - logger.debug(f"Looking for voice: {voice}") + debug(f"Looking for voice: {voice}") selected_voice = await select_voice(voice) return selected_voice elif voice_file and isinstance(voice_file, UploadFile): @@ -277,7 +286,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) return str(new_file) else: - logger.debug(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}") + debug(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}") selected_voice = await select_voice(DEFAULT_VOICE) return selected_voice @@ -315,7 +324,7 @@ async def local_tts( for i, segment in enumerate(segments): segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav" - logger.debug(f"Segment file path: {segment_file_path}") + debug(f"Segment file path: {segment_file_path}") # Run TTS in a separate thread await asyncio.to_thread( @@ -326,7 +335,7 @@ async def local_tts( speaker_wav=[voice_file_path], language="en" ) - logger.debug(f"Segment file generated: {segment_file_path}") + debug(f"Segment file generated: {segment_file_path}") # Load and combine audio in a separate thread segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path)) @@ -401,7 +410,7 @@ def split_text(text, target_length=35, max_length=50): if segment_length + len(sentence_words) > max_length: segments.append(' '.join(current_segment)) - logger.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}") + debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}") current_segment = [sentence] else: @@ -409,7 +418,7 @@ def split_text(text, target_length=35, max_length=50): if current_segment: segments.append(' '.join(current_segment)) - logger.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}") + debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}") return segments @@ -421,7 +430,7 @@ def clean_text_for_tts(text: str) -> str: text = re.sub(r'\s+', ' ', text).strip() return text else: - logger.debug(f"No text received.") + debug(f"No text received.") diff --git a/sijapi/routers/weather.py b/sijapi/routers/weather.py index 0b6a1d8..c920753 100644 --- a/sijapi/routers/weather.py +++ b/sijapi/routers/weather.py @@ -13,10 +13,15 @@ from shapely.wkb import loads from binascii import unhexlify from sijapi import L, VISUALCROSSING_API_KEY, TZ, DB, GEO from sijapi.utilities import haversine -from sijapi.routers import loc +from sijapi.routers import gis weather = APIRouter() logger = L.get_module_logger("weather") +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) @weather.get("/weather/refresh", response_class=JSONResponse) async def get_refreshed_weather( @@ -26,16 +31,16 @@ async def get_refreshed_weather( # date = await date try: if latlon == "None": - date_time = await loc.dt(date) - place = await loc.fetch_last_location_before(date_time) + date_time = await gis.dt(date) + place = await gis.fetch_last_location_before(date_time) lat = place.latitude lon = place.longitude else: lat, lon = latlon.split(',') tz = await GEO.tz_at(lat, lon) - date_time = await loc.dt(date, tz) + date_time = await gis.dt(date, tz) - logger.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather") + debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather") day = await get_weather(date_time, lat, lon, force_refresh=True) day_str = str(day) return JSONResponse(content={"weather": day_str}, status_code=200) @@ -44,20 +49,18 @@ async def get_refreshed_weather( return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code) except Exception as e: - logger.error(f"Error in note_weather_get: {str(e)}") + err(f"Error in note_weather_get: {str(e)}") raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") async def get_weather(date_time: dt_datetime, latitude: float, longitude: float, force_refresh: bool = False): - logger.debug(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}") - logger.warning(f"Using {date_time} as our datetime in get_weather.") fetch_new_data = True if force_refresh == False: daily_weather_data = await get_weather_from_db(date_time, latitude, longitude) if daily_weather_data: try: - logger.debug(f"Daily weather data from db: {daily_weather_data}") + debug(f"Daily weather data from db: {daily_weather_data}") last_updated = str(daily_weather_data['DailyWeather'].get('last_updated')) - last_updated = await loc.dt(last_updated) + last_updated = await gis.dt(last_updated) stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location')) stored_loc = loads(stored_loc_data) stored_lat = stored_loc.y @@ -65,68 +68,64 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float, stored_ele = stored_loc.z hourly_weather = daily_weather_data.get('HourlyWeather') - - logger.debug(f"Hourly: {hourly_weather}") - - logger.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n") - + # debug(f"Hourly: {hourly_weather}") request_haversine = haversine(latitude, longitude, stored_lat, stored_lon) - logger.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}") + debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}") if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0: - logger.debug(f"We can use existing data... :')") + debug(f"We can use existing data... :')") fetch_new_data = False except Exception as e: - logger.error(f"Error in get_weather: {e}") + err(f"Error in get_weather: {e}") if fetch_new_data: - logger.debug(f"We require new data!") + debug(f"We require new data!") request_date_str = date_time.strftime("%Y-%m-%d") - logger.warning(f"Using {date_time.strftime('%Y-%m-%d')} as our datetime for fetching new data.") + debug(f"Using {date_time.strftime('%Y-%m-%d')} as our datetime for fetching new data.") url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}" try: async with AsyncClient() as client: response = await client.get(url) if response.status_code == 200: - logger.debug(f"Successfully obtained data from VC...") + debug(f"Successfully obtained data from VC...") try: weather_data = response.json() store_result = await store_weather_to_db(date_time, weather_data) if store_result == "SUCCESS": - logger.debug(f"New weather data for {request_date_str} stored in database...") + debug(f"New weather data for {request_date_str} stored in database...") else: - logger.error(f"Failed to store weather data for {request_date_str} in database! {store_result}") + err(f"Failed to store weather data for {request_date_str} in database! {store_result}") - logger.debug(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}") + debug(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}") daily_weather_data = await get_weather_from_db(date_time, latitude, longitude) if daily_weather_data is not None: return daily_weather_data else: raise HTTPException(status_code=500, detail="Weather data was not properly stored.") except Exception as e: - logger.error(f"Problem parsing VC response or storing data: {e}") + err(f"Problem parsing VC response or storing data: {e}") raise HTTPException(status_code=500, detail="Weather data was not properly stored.") else: - logger.error(f"Failed to fetch weather data: {response.status_code}, {response.text}") + err(f"Failed to fetch weather data: {response.status_code}, {response.text}") except Exception as e: - logger.error(f"Exception during API call: {e}") + err(f"Exception during API call: {e}") return daily_weather_data async def store_weather_to_db(date_time: dt_datetime, weather_data: dict): - logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in store_weather_to_db") + warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in store_weather_to_db") async with DB.get_connection() as conn: try: day_data = weather_data.get('days')[0] - logger.debug(f"RAW DAY_DATA: {day_data}") + debug(f"RAW DAY_DATA: {day_data}") # Handle preciptype and stations as PostgreSQL arrays preciptype_array = day_data.get('preciptype', []) or [] stations_array = day_data.get('stations', []) or [] date_str = date_time.strftime("%Y-%m-%d") - logger.warning(f"Using {date_str} in our query in store_weather_to_db.") + warn(f"Using {date_str} in our query in store_weather_to_db.") # Get location details from weather data if available longitude = weather_data.get('longitude') @@ -135,11 +134,11 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict): elevation = await GEO.elevation(latitude, longitude) location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None - logger.warning(f"Uncorrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}") - day_data['datetime'] = await loc.dt(day_data.get('datetimeEpoch')) - day_data['sunrise'] = await loc.dt(day_data.get('sunriseEpoch')) - day_data['sunset'] = await loc.dt(day_data.get('sunsetEpoch')) - logger.warning(f"Corrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}") + warn(f"Uncorrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}") + day_data['datetime'] = await gis.dt(day_data.get('datetimeEpoch')) + day_data['sunrise'] = await gis.dt(day_data.get('sunriseEpoch')) + day_data['sunset'] = await gis.dt(day_data.get('sunsetEpoch')) + warn(f"Corrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}") daily_weather_params = ( day_data.get('sunrise'), day_data.get('sunriseEpoch'), @@ -163,7 +162,7 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict): location_point ) except Exception as e: - logger.error(f"Failed to prepare database query in store_weather_to_db! {e}") + err(f"Failed to prepare database query in store_weather_to_db! {e}") try: daily_weather_query = ''' @@ -183,11 +182,11 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict): daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params) if 'hours' in day_data: - logger.debug(f"Processing hours now...") + debug(f"Processing hours now...") for hour_data in day_data['hours']: try: await asyncio.sleep(0.01) - hour_data['datetime'] = await loc.dt(hour_data.get('datetimeEpoch')) + hour_data['datetime'] = await gis.dt(hour_data.get('datetimeEpoch')) hour_preciptype_array = hour_data.get('preciptype', []) or [] hour_stations_array = hour_data.get('stations', []) or [] hourly_weather_params = ( @@ -229,22 +228,22 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict): ''' async with conn.transaction(): hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params) - logger.debug(f"Done processing hourly_weather_id {hourly_weather_id}") + debug(f"Done processing hourly_weather_id {hourly_weather_id}") except Exception as e: - logger.error(f"EXCEPTION: {e}") + err(f"EXCEPTION: {e}") except Exception as e: - logger.error(f"EXCEPTION: {e}") + err(f"EXCEPTION: {e}") return "SUCCESS" except Exception as e: - logger.error(f"Error in dailyweather storage: {e}") + err(f"Error in dailyweather storage: {e}") async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float): - logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.") + warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.") async with DB.get_connection() as conn: query_date = date_time.date() try: @@ -260,19 +259,19 @@ async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude daily_weather_record = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude) if daily_weather_record is None: - logger.debug(f"No daily weather data retrieved from database.") + debug(f"No daily weather data retrieved from database.") return None # Convert asyncpg.Record to a mutable dictionary daily_weather_data = dict(daily_weather_record) - # logger.debug(f"Daily weather data prior to tz corrections: {daily_weather_data}") + # debug(f"Daily weather data prior to tz corrections: {daily_weather_data}") # Now we can modify the dictionary # tz = await GEO.tz_at(latitude, longitude) - # daily_weather_data['datetime'] = await loc.dt(daily_weather_data.get('datetime'), tz) - # daily_weather_data['sunrise'] = await loc.dt(daily_weather_data.get('sunrise'), tz) - # daily_weather_data['sunset'] = await loc.dt(daily_weather_data.get('sunset'), tz) + # daily_weather_data['datetime'] = await gis.dt(daily_weather_data.get('datetime'), tz) + # daily_weather_data['sunrise'] = await gis.dt(daily_weather_data.get('sunrise'), tz) + # daily_weather_data['sunset'] = await gis.dt(daily_weather_data.get('sunset'), tz) - # logger.debug(f"Daily weather data after tz corrections: {daily_weather_data}") + # debug(f"Daily weather data after tz corrections: {daily_weather_data}") # Query to get hourly weather data query = ''' @@ -285,17 +284,17 @@ async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude hourly_weather_data = [] for record in hourly_weather_records: hour_data = dict(record) - # hour_data['datetime'] = await loc.dt(hour_data.get('datetime'), tz) + # hour_data['datetime'] = await gis.dt(hour_data.get('datetime'), tz) hourly_weather_data.append(hour_data) - # logger.debug(f"Hourly weather data after tz corrections: {hourly_weather_data}") + # debug(f"Hourly weather data after tz corrections: {hourly_weather_data}") day = { 'DailyWeather': daily_weather_data, 'HourlyWeather': hourly_weather_data, } - # logger.debug(f"day: {day}") + # debug(f"day: {day}") return day except Exception as e: - logger.error(f"Unexpected error occurred: {e}") + err(f"Unexpected error occurred: {e}") diff --git a/sijapi/utilities.py b/sijapi/utilities.py index c47de22..73f9971 100644 --- a/sijapi/utilities.py +++ b/sijapi/utilities.py @@ -30,17 +30,22 @@ from fastapi.security.api_key import APIKeyHeader from sijapi import L, API, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR logger = L.get_module_logger('utilities') +def debug(text: str): logger.debug(text) +def info(text: str): logger.info(text) +def warn(text: str): logger.warning(text) +def err(text: str): logger.error(text) +def crit(text: str): logger.critical(text) api_key_header = APIKeyHeader(name="Authorization", auto_error=False) def validate_api_key(request: Request, api_key: str = Depends(api_key_header)): if request.url.path in API.PUBLIC: - return + return client_ip = ipaddress.ip_address(request.client.host) trusted_subnets = [ipaddress.ip_network(subnet) for subnet in API.TRUSTED_SUBNETS] if any(client_ip in subnet for subnet in trusted_subnets): - return + return # Check header-based API key if api_key: @@ -63,18 +68,18 @@ def assemble_archive_path(filename: str, extension: str = ".md", date_time: date day = date_time.strftime(DAY_FMT) day_short = date_time.strftime(DAY_SHORT_FMT) timestamp = date_time.strftime("%H%M%S") - + # Ensure the extension is preserved base_name, ext = os.path.splitext(filename) extension = ext if ext else extension - + # Initial sanitization sanitized_base = sanitize_filename(base_name, '') filename = f"{day_short} {timestamp} {sanitized_base}{extension}" - + relative_path = Path(year) / month / day / filename absolute_path = ARCHIVE_DIR / relative_path - + # Ensure the total path length doesn't exceed MAX_PATH_LENGTH while len(str(absolute_path)) > MAX_PATH_LENGTH: # Truncate the sanitized_base, not the full filename @@ -82,7 +87,7 @@ def assemble_archive_path(filename: str, extension: str = ".md", date_time: date filename = f"{day_short} {timestamp} {sanitized_base}{extension}" relative_path = Path(year) / month / day / filename absolute_path = ARCHIVE_DIR / relative_path - + return absolute_path, relative_path @@ -111,17 +116,17 @@ def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str extension = extension if extension.startswith(".") else f".{extension}" else: extension = validate_extension(filename, [".md", ".m4a", ".wav", ".aiff", ".flac", ".mp3", ".mp4", ".pdf", ".js", ".json", ".yaml", ".py"]) or ".md" - + filename = sanitize_filename(filename) filename = f"{day_short} {filename}" if no_timestamp else f"{day_short} {timestamp} {filename}" filename = f"{filename}{extension}" if not filename.endswith(extension) else filename relative_path = relative_path / filename - + else: - logger.debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.") + debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.") return None, None - - absolute_path = OBSIDIAN_VAULT_DIR / relative_path + + absolute_path = OBSIDIAN_VAULT_DIR / relative_path os.makedirs(absolute_path.parent, exist_ok=True) return absolute_path, relative_path @@ -142,8 +147,8 @@ def f(file): if hasattr(file, 'read') and callable(file.read): return file if isinstance(file, (bytes, bytearray)): - return file - + return file + if isinstance(file, Path): file_path = file elif isinstance(file, str): @@ -165,16 +170,16 @@ def get_extension(file): file_path = Path(file.filename) file_extension = file_path.suffix return file_extension - + except Exception as e: - logger.error(f"Unable to get extension of {file}") + err(f"Unable to get extension of {file}") raise e def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH): """Sanitize a string to be used as a safe filename while protecting the file extension.""" - logger.debug(f"Filename before sanitization: {text}") + debug(f"Filename before sanitization: {text}") text = re.sub(r'\s+', ' ', text) sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text) @@ -186,7 +191,7 @@ def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LE base_name = base_name[:max_base_length - 5].rstrip() final_filename = base_name + extension - logger.debug(f"Filename after sanitization: {final_filename}") + debug(f"Filename after sanitization: {final_filename}") return final_filename @@ -196,16 +201,16 @@ def check_file_name(file_name, max_length=255): needs_sanitization = False if len(file_name) > max_length: - logger.debug(f"Filename exceeds maximum length of {max_length}: {file_name}") + debug(f"Filename exceeds maximum length of {max_length}: {file_name}") needs_sanitization = True if re.search(ALLOWED_FILENAME_CHARS, file_name): - logger.debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}") + debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}") needs_sanitization = True if re.search(r'\s{2,}', file_name): - logger.debug(f"Filename contains multiple consecutive spaces: {file_name}") + debug(f"Filename contains multiple consecutive spaces: {file_name}") needs_sanitization = True if file_name != file_name.strip(): - logger.debug(f"Filename has leading or trailing spaces: {file_name}") + debug(f"Filename has leading or trailing spaces: {file_name}") needs_sanitization = True return needs_sanitization @@ -230,7 +235,7 @@ async def extract_text(file_path: str) -> str: """Extract text from file.""" if file_path.endswith('.pdf'): return await extract_text_from_pdf(file_path) - + elif file_path.endswith('.docx'): return await extract_text_from_docx(file_path) @@ -248,13 +253,13 @@ async def ocr_pdf(file_path: str) -> str: texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images)) return ' '.join(texts) except Exception as e: - logger.error(f"Error during OCR: {str(e)}") + err(f"Error during OCR: {str(e)}") return "" async def extract_text_from_pdf(file_path: str) -> str: if not await is_valid_pdf(file_path): - logger.error(f"Invalid PDF file: {file_path}") + err(f"Invalid PDF file: {file_path}") return "" text = '' @@ -267,12 +272,12 @@ async def extract_text_from_pdf(file_path: str) -> str: text_content = page.extract_text() + ' ' if page.extract_text() else '' text += text_content num_pages = len(reader.pages) - + # If text was extracted successfully and it's deemed sufficient, return it if text and not should_use_ocr(text, num_pages): return clean_text(text) except Exception as e: - logger.error(f"Error extracting text with PyPDF2: {str(e)}") + err(f"Error extracting text with PyPDF2: {str(e)}") # If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six try: @@ -280,10 +285,10 @@ async def extract_text_from_pdf(file_path: str) -> str: if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages): return clean_text(text_pdfminer) except Exception as e: - logger.error(f"Error extracting text with pdfminer.six: {e}") + err(f"Error extracting text with pdfminer.six: {e}") # If both methods fail or are deemed insufficient, use OCR as the last resort - logger.debug("Falling back to OCR for text extraction...") + debug("Falling back to OCR for text extraction...") return await ocr_pdf(file_path) async def is_valid_pdf(file_path: str) -> bool: @@ -292,12 +297,12 @@ async def is_valid_pdf(file_path: str) -> bool: kind = filetype.guess(file_path) return kind.mime == 'application/pdf' except Exception as e: - logger.error(f"Error checking file type: {e}") + err(f"Error checking file type: {e}") return False async def extract_text_from_pdf(file_path: str) -> str: if not await is_valid_pdf(file_path): - logger.error(f"Invalid PDF file: {file_path}") + err(f"Invalid PDF file: {file_path}") return "" text = '' @@ -309,23 +314,23 @@ async def extract_text_from_pdf(file_path: str) -> str: if text.strip(): # Successfully extracted text return clean_text(text) except Exception as e: - logger.error(f"Error extracting text with PyPDF2: {str(e)}") + err(f"Error extracting text with PyPDF2: {str(e)}") try: text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path) if text_pdfminer.strip(): # Successfully extracted text return clean_text(text_pdfminer) except Exception as e: - logger.error(f"Error extracting text with pdfminer.six: {str(e)}") + err(f"Error extracting text with pdfminer.six: {str(e)}") # Fall back to OCR - logger.debug("Falling back to OCR for text extraction...") + debug("Falling back to OCR for text extraction...") try: images = convert_from_path(file_path) ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images)) return ' '.join(ocr_texts).strip() except Exception as e: - logger.error(f"OCR failed: {str(e)}") + err(f"OCR failed: {str(e)}") return "" async def extract_text_from_docx(file_path: str) -> str: @@ -333,7 +338,7 @@ async def extract_text_from_docx(file_path: str) -> str: doc = Document(file_path) full_text = [paragraph.text for paragraph in doc.paragraphs] return '\n'.join(full_text) - + return await asyncio.to_thread(read_docx, file_path) # Correcting read_text_file to be asynchronous @@ -345,7 +350,7 @@ def _sync_read_text_file(file_path: str) -> str: # Actual synchronous file reading operation with open(file_path, 'r', encoding='utf-8') as file: return file.read() - + def should_use_ocr(text, num_pages) -> bool: if not text: return True # No text was extracted, use OCR @@ -377,7 +382,7 @@ def convert_degrees_to_cardinal(d): """ dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"] ix = round(d / (360. / len(dirs))) - return dirs[ix % len(dirs)] + return dirs[ix % len(dirs)] @@ -409,7 +414,7 @@ def convert_to_12_hour_format(datetime_obj_or_str): datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S") else: datetime_obj = datetime_obj_or_str - + if isinstance(datetime_obj_or_str, str): time24 = datetime_obj_or_str else: @@ -427,8 +432,8 @@ def encode_image_to_base64(image_path): byte_data = output_buffer.getvalue() base64_str = base64.b64encode(byte_data).decode('utf-8') return base64_str - else: - logger.debug(f"Error: File does not exist at {image_path}") + else: + debug(f"Error: File does not exist at {image_path}") def resize_and_convert_image(image_path, max_size=2160, quality=80): with Image.open(image_path) as img: @@ -458,7 +463,7 @@ def load_geonames_data(path: str): names=columns, low_memory=False ) - + return data async def run_ssh_command(server, command): @@ -472,5 +477,5 @@ async def run_ssh_command(server, command): ssh.close() return output, error except Exception as e: - logger.error(f"SSH command failed for server {server.id}: {str(e)}") - raise \ No newline at end of file + err(f"SSH command failed for server {server.id}: {str(e)}") + raise