sijapi/sijapi/__init__.py

197 lines
9.2 KiB
Python
Raw Normal View History

# __init__.py
2024-06-23 14:51:45 -07:00
import os
from pathlib import Path
import ipaddress
import multiprocessing
from dotenv import load_dotenv
from dateutil import tz
from pathlib import Path
from .classes import Geocoder, APIConfig, Configuration, Logger
2024-06-23 14:51:45 -07:00
2024-07-11 14:23:37 -07:00
# INITIALization
2024-06-23 14:51:45 -07:00
BASE_DIR = Path(__file__).resolve().parent
CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = CONFIG_DIR / ".env"
2024-07-11 14:23:37 -07:00
load_dotenv(ENV_PATH)
2024-06-23 14:51:45 -07:00
LOGS_DIR = BASE_DIR / "logs"
os.makedirs(LOGS_DIR, exist_ok=True)
2024-07-11 14:23:37 -07:00
L = Logger("Central", LOGS_DIR)
2024-06-23 14:51:45 -07:00
2024-07-11 14:23:37 -07:00
# API essentials
API = APIConfig.load('api', 'secrets')
Dir = Configuration.load('dirs')
2024-07-22 12:19:31 -07:00
HOST = f"{API.BIND}:{API.PORT}"
2024-06-23 14:51:45 -07:00
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
IMG = Configuration.load('img', 'secrets')
2024-07-22 12:19:31 -07:00
News = Configuration.load('news', 'secrets')
Archivist = Configuration.load('archivist')
2024-07-22 12:19:31 -07:00
Scrape = Configuration.load('scrape', 'secrets', Dir)
Serve = Configuration.load('serve')
2024-07-11 14:23:37 -07:00
# Directories & general paths
2024-06-23 14:51:45 -07:00
ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts"
os.makedirs(ALERTS_DIR, exist_ok=True)
REQUESTS_DIR = LOGS_DIR / "requests"
os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
BLOCKLISTS_DIR = DATA_DIR / "blocklists"
2024-06-23 14:51:45 -07:00
2024-07-11 14:23:37 -07:00
# LOCATE AND WEATHER LOCALIZATIONS
2024-06-23 14:51:45 -07:00
USER_FULLNAME = os.getenv('USER_FULLNAME')
USER_BIO = os.getenv('USER_BIO')
NAMED_LOCATIONS = CONFIG_DIR / "named-locations.yaml"
2024-06-23 14:51:45 -07:00
VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline")
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
GEONAMES_TXT = DATA_DIR / "geonames.txt"
LOCATIONS_CSV = DATA_DIR / "US.csv"
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
TZ_CACHE = DATA_DIR / "tzcache.json"
GEO = Geocoder(NAMED_LOCATIONS, TZ_CACHE)
2024-06-23 14:51:45 -07:00
2024-07-11 14:23:37 -07:00
# Obsidian & notes
ALLOWED_FILENAME_CHARS = r'[^\w \.-]'
MAX_PATH_LENGTH = 254
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or Path(Dir.HOME) / "Nextcloud" / "notes")
2024-06-23 14:51:45 -07:00
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
OBSIDIAN_BANNER_DIR = f"{OBSIDIAN_RESOURCES_DIR}/banners"
os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_BANNER_DIR, exist_ok=True)
OBSIDIAN_BANNER_SCENE = os.getenv("OBSIDIAN_BANNER_SCENE", "wallpaper")
OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian")
ARCHIVE_DIR = Path(os.getenv("ARCHIVE_DIR", OBSIDIAN_VAULT_DIR / "archive"))
os.makedirs(ARCHIVE_DIR, exist_ok=True)
2024-06-23 14:51:45 -07:00
DOC_DIR = DATA_DIR / "docs"
os.makedirs(DOC_DIR, exist_ok=True)
2024-07-11 14:23:37 -07:00
# DATETIME SCHEMA FOR DAILY NOTE FOLDER HIERARCHY FORMATTING ###
2024-06-23 14:51:45 -07:00
YEAR_FMT = os.getenv("YEAR_FMT")
MONTH_FMT = os.getenv("MONTH_FMT")
DAY_FMT = os.getenv("DAY_FMT")
DAY_SHORT_FMT = os.getenv("DAY_SHORT_FMT")
2024-07-11 14:23:37 -07:00
# Large language model
2024-06-23 14:51:45 -07:00
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "llama3")
2024-06-23 14:51:45 -07:00
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "Luna")
2024-06-25 03:12:07 -07:00
DEFAULT_11L_VOICE = os.getenv("DEFAULT_11L_VOICE", "Victoria")
2024-06-23 14:51:45 -07:00
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
2024-07-11 14:23:37 -07:00
# Summarization
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 16384)) # measured in tokens
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 256)) # measured in tokens
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "dolphin-llama3:8b-256k")
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 16384))
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
2024-07-11 14:23:37 -07:00
# Stable diffusion
IMG_DIR = DATA_DIR / "img" / "images"
os.makedirs(IMG_DIR, exist_ok=True)
IMG_WORKFLOWS_DIR = DATA_DIR / "img" / "workflows"
os.makedirs(IMG_WORKFLOWS_DIR, exist_ok=True)
2024-06-23 14:51:45 -07:00
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py')
IMG_CONFIG_PATH = CONFIG_DIR / 'img.yaml'
2024-06-23 14:51:45 -07:00
2024-07-11 14:23:37 -07:00
# ASR
2024-06-23 14:51:45 -07:00
ASR_DIR = DATA_DIR / "asr"
os.makedirs(ASR_DIR, exist_ok=True)
WHISPER_CPP_DIR = Path(Dir.HOME) / str(os.getenv("WHISPER_CPP_DIR"))
2024-06-23 14:51:45 -07:00
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'NULL,VOID').split(',')
2024-07-11 14:23:37 -07:00
# TTS
2024-06-23 14:51:45 -07:00
PREFERRED_TTS = os.getenv("PREFERRED_TTS", "None")
TTS_DIR = DATA_DIR / "tts"
os.makedirs(TTS_DIR, exist_ok=True)
VOICE_DIR = TTS_DIR / 'voices'
os.makedirs(VOICE_DIR, exist_ok=True)
PODCAST_DIR = os.getenv("PODCAST_DIR", TTS_DIR / "sideloads")
2024-06-23 14:51:45 -07:00
os.makedirs(PODCAST_DIR, exist_ok=True)
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
2024-07-11 14:23:37 -07:00
# Calendar & email account
2024-06-23 14:51:45 -07:00
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
2024-06-25 03:12:07 -07:00
EMAIL_CONFIG = CONFIG_DIR / "email.yaml"
EMAIL_LOGS = LOGS_DIR / "email"
os.makedirs(EMAIL_LOGS, exist_ok = True)
2024-06-23 14:51:45 -07:00
2024-07-11 14:23:37 -07:00
# Courtlistener & other webhooks
2024-06-23 14:51:45 -07:00
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
2024-07-11 14:23:37 -07:00
# Keys & passwords
2024-06-23 14:51:45 -07:00
PUBLIC_KEY_FILE = os.getenv("PUBLIC_KEY_FILE", 'you_public_key.asc')
PUBLIC_KEY = (BASE_DIR.parent / PUBLIC_KEY_FILE).read_text()
MAC_ID = os.getenv("MAC_ID")
MAC_UN = os.getenv("MAC_UN")
MAC_PW = os.getenv("MAC_PW")
TIMING_API_KEY = os.getenv("TIMING_API_KEY")
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
2024-07-11 14:23:37 -07:00
# Tailscale
2024-06-23 14:51:45 -07:00
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
TS_ID = os.getenv("TS_ID", "NULL")
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
2024-07-11 14:23:37 -07:00
# Cloudflare
2024-06-23 14:51:45 -07:00
CF_API_BASE_URL = os.getenv("CF_API_BASE_URL")
CF_TOKEN = os.getenv("CF_TOKEN")
CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
2024-07-11 14:23:37 -07:00
# Caddy - not fully implemented
API.URL = os.getenv("API.URL")
2024-06-23 14:51:45 -07:00
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
2024-07-11 14:23:37 -07:00
# Microsoft Graph
2024-06-23 14:51:45 -07:00
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
MS365_SECRET = os.getenv('MS365_SECRET')
2024-07-22 12:19:31 -07:00
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
MS365_CERT_PATH = DATA_DIR / 'ms365' / '.cert.pem' # deprecated
MS365_KEY_PATH = DATA_DIR / 'ms365' / '.cert.key' # deprecated
2024-06-23 14:51:45 -07:00
MS365_KEY = MS365_KEY_PATH.read_text()
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
2024-07-11 14:23:37 -07:00
# Maintenance
2024-06-23 14:51:45 -07:00
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
2024-07-22 12:19:31 -07:00
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours