Initial commit

This commit is contained in:
sanj 2024-06-23 00:19:36 -07:00
commit 7ea0783076
62 changed files with 13964 additions and 0 deletions

82
.gitignore vendored Normal file
View file

@ -0,0 +1,82 @@
# Ignore specific data files and directories
sijapi/data/calendar.ics
sijapi/data/asr/
sijapi/data/geocoder/
sijapi/data/tts/
sijapi/data/db/
sijapi/data/*.pbf
sijapi/data/geonames.txt
sijapi/data/sd/images/
sijapi/config/O365/
# Ignore all .env files
**/.env
# Ignore all log files
**/*.log
**/logs/
# Ignore Python cache files
**/__pycache__
# Ignore macOS system files
**/.DS_Store
# Ignore all .ics files
**/*.ics
# Ignore sync conflict files
**/*sync-conflict*.*
# Ignore database files
**/*.db
# Ignore large binary files
**/*.mp3
**/*.mp4
**/*.wav
# Ignore compiled Python files
**/*.pyc
# Ignore Jupyter Notebook checkpoints
**/.ipynb_checkpoints/
# Ignore virtual environment directories
venv/
env/
.venv/
# Ignore IDE-specific files
.vscode/
.idea/
# Ignore temporary files
*~
*.swp
*.swo
# Ignore compiled files
*.com
*.class
*.dll
*.exe
*.o
*.so
# Ignore packages
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
# Ignore OS generated files
ehthumbs.db
Thumbs.db
# Ignore .gitignore itself (optional)
#.gitignore

481
README.md Normal file
View file

@ -0,0 +1,481 @@
```
#──────────────────────────────────────────────────────────────────────────────────
# C O N F I G U R A T I O N F I L E
#──────────────────────────────────────────────────────────────────────────────────
#
# Hi friend! You've found my hidden .config.YAML-example file. Do you like
# old-school ASCII art? I bet you do. So listen, this'll be your method for
# configuring sijapi, and nothing works until you at least:
#
# (1) fill in the ESSENTIALS category, and
#
# (2) rename this file `.config.yaml`
#
# ... and even then, certain features will not work until you set other
# relevant variables below.
#
# So get yourself a beverage, put on some sick beats, and settle in for a vibe-y
# configuration sesh. Remember to read my detailed notes if you ever feel lost,
# and most important, remember:
#
# you are NOT alone,
# I love you SO much,
# and you are SO worthy. <3
#
# y o u r b f & b f 4 e , †
# .x+=:. . .
# z` ^% @88> .. @88>
# . <k %8P 888> .d`` %8P
# .@8Ned8" . "8P u @8Ne. .u .
# .@^%8888" .@88u . us888u. %8888:u@88N .@88u
# x88: `)8b. ''888E` u888u. .@88 "8888" `888I 888. ''888E`
# ~ 8888N=*8888 888E `'888E 9888 9888 888I 888I 888E
# %8" R88 888E 888E 9888 9888 888I 888I 888E
# @8Wou 9% 888E 888E 9888 9888 uW888L 888' 888E
# .888888P` 888& 888E 9888 9888 '*88888Nu88P 888&
# ` ^"F R888" 888E "888*""888" ~ '88888F` R888"
# "" 888E ^Y" ^Y' 888 ^ ""
# 888E *8E
# 888P '8> † biggest fan
# .J88" " " and best
# friend 4 e v e r
#
# B U T I H E A R Y O U :
# L E T ' S T A K E I T S L O W A N D
# ────────────── S T A R T W I T H T H E ──────────────
#
# ███████╗███████╗███████╗███████╗███╗ ██╗████████╗██╗ █████╗ ██╗ ███████╗
# ██╔════╝██╔════╝██╔════╝██╔════╝████╗ ██║╚══██╔══╝██║██╔══██╗██║ ██╔════╝
# █████╗ ███████╗███████╗█████╗ ██╔██╗ ██║ ██║ ██║███████║██║ ███████╗
# ██╔══╝ ╚════██║╚════██║██╔══╝ ██║╚██╗██║ ██║ ██║██╔══██║██║ ╚════██║
# ███████╗███████║███████║███████╗██║ ╚████║ ██║ ██║██║ ██║███████╗███████║
# ╚══════╝╚══════╝╚══════╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝
# ─────────────────────────────────────────────────────────────────
#
#─── first, bind an ip address and port : ──────────────────────────────────────────
HOST_NET=0.0.0.0
HOST_PORT=4444
BASE_URL=https://api.sij.ai
#─── notes: ──────────────────────────────────────────────────────────────────────
#
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
# BASE_URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the sd router.
# BASE_URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
#
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
#
# Here are a few options to consider to more securely enable access from
# other devices:
#
# (1) if all access can occur over Tailscale, either:
# (a) leave HOST_NET set to 127.0.0.1, run `tailscale cert $(tailscale
# whois $(tailscale ip | head -n 1) | awk '/Name:/ {print $2}')
# if you haven't already issued yourself a TLS certificate on
# Tailscale, and then run `tailscale serve --bg --https=4443
# 4444` to expose sijapi to your other tailscale-enabled devices
# at `https://{device.magicdns-domain.net:4443`}; or
# (b) set HOST_NET to your server's Tailscale IP (this should work
# but for me doesn't reliably)
#
# (2) if WAN access truly is required, leave HOST_NET set to 127.0.0.1 and
# configure either:
# (a) a Cloudflare tunnel, or
# (b) a reverse proxy with HTTPS (Caddy is excellent for this).
#
# And please be sure to set a strong API key either way but especially for (2).
# ──────────
#
#──── configure API key authorization and select exemptions────────────────────────
GLOBAL_API_KEY=sk-NhrtQwCHNdK5sRZC
PUBLIC_SERVICES=/id,/ip,/health,/img/,/cl/dockets,/cl/search,/cd/alert
TRUSTED_SUBNETS=127.0.0.1/32,10.13.37.0/24,100.64.64.0/24
#─── notes: ──────────────────────────────────────────────────────────────────────
#
# GLOBAL_API_KEY determines the API key that will be required to access all endpoints, except access to PUBLIC_SERVICES or from TRUSTED_SUBNETS. Authentication is made via an `Authorization: Bearer {GLOBAL_API_KEY}` header.
# TRUSTED_SUBNETS might commonly include 127.0.0.1/32 (localhost), 100.x.x.0/24 (Tailscale tailnet), and/or 192.168.x.0/24 or 10.x.x.0/24 (local network).
# When configuring a reverse proxy or Cloudflare tunnel, please verify traffic through it does not appear to sijapi (i.e. in ./logs) as though it were coming from any of the subnets specified here. For sij, using Caddy, it does not, but your setup may differ.
# ──────────
#
#─── router selection: ────────────────────────────────────────────────────────────
ROUTERS=asr,llm,health,hooks,locate,note,sd,serve,summarize,time,tts,weather
UNLOADED=auth,calendar,cf,email,ig
#─── notes: ──────────────────────────────────────────────────────────────────────
#
# ROUTERS determines which routers are loaded.†
# UNLOADED is not used directly -- it's just there to help keep track which routers are disabled.
#
# † ┓ ┏ orth bearing in mind: some routers inherently rely on other routers,
# ┃┃┃ 3rd party APIs, or other apps being installed locally. If a router is
# ┗┻┛ set to load (i.e. is included in ROUTERS) depends on another router,
# that other router will also load too irrespective of whether it's listed.
#
# But let's get down to brass tacks, shall we?
#
# asr: requires faster_whisper — $ pip install faster_whisper — and
# downloading the model file specified in ASR_DEFAULT_MODEL.
#
# auth: authenticates a Microsoft 365 account (for email & calendar).
#
# calendar: requires (1) a Microsoft 365 account with a properly configured
# Azure Active Directory app, and/or (2) Calendars on macOS.
#
# cf: interfaces with the Cloudflare API and Caddy to register new
# [sub-]domains on Cloudflare and deploy them with Caddy as
# reverse proxy.
#
# llm: requires ollama — $ pip install ollama — and downloading the
# models set in LLM_DEFAULT_MODEL and LLM_VISION_MODEL.
#
# email: designed for accessing Protonmail via Protonmail Bridge and/or
# Microsoft 365, but should work with any IMAP/SMTP email account.
#
# hooks: designed for two specific use cases: monitoring court dockets
# through CourtListener.org, and monitoring arbitrary web pages for
# changes in tandem with a self-hosted changedetection.io instance.
# Both require accounts; other functionality would require
# additional / modified code.
#
# ig: requires an Instagram account, with credentials and other settings
# configured separately in the ig_config.json file; relies heavily
# on the llm and sd routers which have their own dependencies.
#
# locate: some endpoints work as is, but the core location tracking
# functionality requires Postgresql + PostGIS extension and are
# designed specifically to pair with a mobile device where
# Pythonista is installed and configured to run the
# `gps_tracker.py` and `gps_upload.py` scripts periodically or per
# repeating conditionwy (e.g. via automation under Apple Shortcuts).
#
# note: designed for use with Obsidian plus the Daily Notes and Tasks
# core extensions; and the Admonitions, Banners, Icons (with the
# Lucide pack), and Make.md community extensions. Moreover `notes`
# relies heavily on the calendar, llm, locate, sd, summarize, time,
# tts, and weather routers and accordingly on the external
# dependencies of each.
#
# sd: requires ComfyUI plus any modules and StableDiffusion models
# set in sd_config and individual workflow .json files.
#
# summarize: relies on the llm router and thus requires ollama.
#
# time: requires the subscription-based macOS app 'Timing' (one of many
# apps that together make SetApp an incredible value for macOS users!)
#
# tts: designed for use with coqui — $ pip install coqui — and/or the
# ElevenLabs API.
#
# weather: requires a VisualCrossing API key and is designed for (but doesn't
# itself strictly require) Postgresql with the PostGIS extension;
# (... but it presently relies on the locate router, which does).
#
#
# ... Whew! that was a lot, right? I'm so glad we're in this together...
# ──────────
#
#─────────────────────── Y O U ' R E G O N N A L O V E ────────────────────────
#
# ░ ░░ ░░ ░ ░░░░░░░░ ░░░ ░░░ ░░ ░░░░░░░ ░
# ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒
# ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓▓▓▓▓ ▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓ ▓▓▓▓▓▓▓ ▓▓▓
# ████ ████ ████ ████ █████████████ █ ████ █ █ ███████ ███████
# ████ ████ ████ █ █ ██ ███ ██ ████ █ █ █
#
# A N D I ' M N O T. E V E N. J E A L O U S.
# Y O U D E S E R V E I T A L L , B A B Y C A K E S.
#
#─── use tailscale for secure remote access: ───────────────────────────────────────
TS_IP=100.64.64.20
TS_SUBNET=100.64.64.0/24
TS_ID=sij-mbp16
TS_TAILNET=starling-sailfin
TAILSCALE_API_KEY=tskey-api-kosR4MfJtF11CNTRL-zJu4odnpr4huLwAGsuy54hvkJi2ScVWQL
#─── notes: ──────────────────────────────────────────────────────────────────────
#
# TS_IP should match the Tailscale IP of the device. But this is deprecated, and if the functionality becomes relevant again, it should be come back in the form of a dynamic check (`tailscale status` in a shell subprocess) in __init__.py or even the /id endpoint.
# TS_SUBNET should match the IP/CIDR-format tailnet
# TS_ID currently has two roles: it's used to assemble the complete MagicDNS of the server, and it determines what the /id endpoint on the health router returns. This is relevant where multiple servers run the script behind a load balancer (e.g. Caddy), as a means to check which server responds. Bear in mind that /id is NOT API key-protected by default here.
# TS_TAILNET should match the tailnet's MagicDNS domain (omitting the `.net`, for reasons)
# ──────────
#
#────────────── U & M E ── W E C A N G E T T H R O U G H ──────────────────
#
# ██▓███ ▒█████ ██████ ▄▄▄█████▓ ▄████ ██▀███ ▓█████ ██████
# ▓██░ ██▒██▒ ██▒▒██ ▒ ▓ ██▒ ▓▒ ██▒ ▀█▒▓██ ▒ ██▒▓█ ▀ ▒██ ▒
# ▓██░ ██▓▒██░ ██▒░ ▓██▄ ▒ ▓██░ ▒░▒██░▄▄▄░▓██ ░▄█ ▒▒███ ░ ▓██▄
# ▒██▄█▓▒ ▒██ ██░ ▒ ██▒░ ▓██▓ ░ ░▓█ ██▓▒██▀▀█▄ ▒▓█ ▄ ▒ ██▒
# ▒██▒ ░ ░ ████▓▒░▒██████▒▒ ▒██▒ ░ ░▒▓███▀▒░██▓ ▒██▒░▒████▒▒██████▒▒
# ▒██▒ ░ ░ ▒░▒░▒░ ▒ ▒▓▒ ▒ ░ ▒ ░░ ░▒ ▒ ░ ▒▓ ░▒▓░░░ ▒░ ░▒ ▒▓▒ ▒ ░
# ▒▓▒░ ░ ▒ ▒░ ░ ░▒ ░ ░ ░ ░ ░ ░▒ ░ ▒░ ░ ░ ░░ ░▒ ░ ░
# ░▒ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░░ ░ ░ ░ ░ ░
# ░░ ░ ░ T O G E T H ░ R . ░ ░ ░ ░ ░
# ░
#─── for weather and locate modules: ───────────── J U S T H O L D M Y H A N D .
DB=sij
# R E A L T I G H T.
DB_HOST=127.0.0.1
DB_PORT=5432
# U G O T T H I S , K ?
DB_USER=sij
DB_PASS='Synchr0!'
# Y E A H . . .
DB_SSH=100.64.64.15
# * J U S T L I K E T H A T . *
DB_SSH_USER=sij
DB_SSH_PASS='Synchr0!'
#─── notes: ────────────────────────────────────────────────── S E E ? E Z - P Z
#
# DB, DB_HOST, DB_PORT, DB_USER, and DB_PASS should specify those respective
# credentials for your Postgres database. DB_SSH and associated _USER and _PASS
# variables allow database access over an SSH tunnel.
#
# In the current implementation, we rely on Postgres to hold:
# i. user-logged location data (locate module), and
# ii. results from past weather forecast checks (weather module).
#
# A future version will hopefully make use of PostGIS's geocoding capabilities,
# and add a vector database for the LLM module. Until then it's up to you if the
# locate and weather modules are worth the hassle of maintaining Postgres.
# ──────────
#
#
#───── Y O U C A N S I T T H I S O N E) O U T B A B E , ────────<3
# ( ( ( I F Y O U ' D )
# ))\( ( /(( L I K E . . . ( (
# ( (()/(( /((_)\ )\())),----,.
# )\((__ ))\( ()) |__))((_)- ))((,' ,'
# ,' , `. /((_)\(_) / / '. |(_)|_ ,' .'
# ,-+-,.' _ | / / '. / ../ ; ,---. ,----.' .'
# ,-+-. ; , || | : /`. / \ ``\ .`- ' / \ | | .'
# ,--.'|' | ;|; | |--` \___\/ \ : / / ' : : |--,
# | | ,', | ':| : ;_ \ : | . ' / : | ;.' \
# | | / | | || \ \ `. / / / ' / ; | | |
# ' | : | : |, `----. \ ) \ \ \ | : \ `----'.'\ ;
# ; . | ; |--' )(__ \ \ | ((__ / : |; | ``. __ \ . |
# | : | | , / /`--' / /)\(/\ / :' ; \ / /\/ / :
# | : ' |/ '--'. / / ,,/ ',- .' | .\ |/ ,,/ ',- .
# ; | |`-' `--'---' \ ''\ ; | : '; :\ ''\ ;
# | ;/ O R , Y U P , \ \ .' \ \ / \ \ .'
# '---'B U R N I T A L L D O W N.-`-,,-' `---`--` `--`-,-'
# Y O U H A V E A
# G O D D E S S O F D E S T R U C T I O N W I T H I N ,
# A N D T H A T I S S O V A L I D !!
#─── ms365 (calendars): ──────────────────────────────────────────────────────────────
MS365_TOGGLE=False
ICAL_TOGGLE=True
ICALENDARS=3CCC9C7B-BFF0-4850-9CE9-BC504859CBC6,E68FE085-2ECA-4097-AF0A-8D38C404D8DA,AB5A0473-16DD-4916-BD6D-6CB916726605∑∑
MS365_CLIENT_ID=ce8cbd24-f146-4dc7-8ee7-51d9b69dec59
MS365_TENANT_ID=bad78048-a6e0-47b1-a24b-403c444aa349
MS365_SECRET=gbw8Q~7U90GMdvneNnPnzAUt5nWVJPbOsagLPbMe
MS365_THUMBPRINT=4CD86699A8B675411EE9C971CB2783E11F9E52CB
MS365_SCOPE=basic,calendar_all
MS365_TOKEN_FILE=oauth_token.txt
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
# # MS365_CLIENT_ID, _TENANT_ID, _SECRET, AND _SCOPES must be obtained from Microsoft
# via the Azure portal, by creating a new app registration and an accompanying secret.
# MS365_THUMBPRINT is vestige of an earlier failed attempt to get this working, and
# for now is deprecated. I recommend seeking out a well-reviewed tutorial for
# creating an app on Azure with a client_id and secret and necessary scopes for
# individual calendar access, because I had one heck of a time trying various approaches.
# Do better, Microsoft.
# ──────────
#
#──────────────────────────────── I B E T Y O U ──────────────────────────────────
# R E C E I V E A L O T O F L O V E L E T T E R S O V E R
#
# .----------------. .----------------. .----------------. .----------------.
# | .--------------. | .--------------. | .--------------. | .--------------. |
# | | _____ | | | ____ ____ | | | __ | | | ______ | |
# | | |_ _| | | ||_ \ / _|| | | / \ | | | |_ __ \ | |
# | | | | | | | | \/ | | | | / /\ \ | | | | |__) | | |
# | | | | | | | | |\ /| | | | | / ____ \ | | | | ___/ | |
# | | _| |_ | | | _| |_\/_| |_ | | | _/ / \ \_ | | | _| |_ | |
# | | |_____| | | ||_____||_____|| | ||____| |____|| | | |_____| | |
# | | | | | | | | | | | | |
# | '--------------' | '--------------' | '--------------' | '--------------' |
# '----------------' '----------------' '----------------' '----------------'
#
# E M A I L
#
#─── imap & smtp: ────────────────────────────────────────────────────────────────────────
IMAP_HOST=127.0.0.1
EMAIL_ADDRESS='sij@sij.law'
EMAIL_PASSWORD='hesSw7Kum16z-_yxI4kfXQ'
IMAP_PORT=1143
IMAP_ENCRYPTION=STARTTLS
SMTP_PORT=1025
SMTP_ENCRYPTION=SSL
#─── notes: ───────────────────────────────────────────────────────────────────────────────
#
# This is primarily for summarizing incoming emails. Any IMAP account should work, but
# I focused testing on a somewhat complex setup involving Protonmail Bridge.
# ──────────
#
#──────────────────────────────── G E T S I L L Y ────────────────────────────────────
# T H E N G O B O N K E R S
# W I T H Y O U R O W N
#
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓██████▒▓██████▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓████████▓▒ ░▒▓████████▓▒ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
#
# ( F O R R E A L T H O U G H — T H E S E
#─── via comfyui (stable diffusion): A R E S O H O T R I G H T N O W )
LLM_URL=http://localhost:11434
SYSTEM_MSG=You are a helpful AI assistant.
DEFAULT_LLM=dolphin-mistral
DEFAULT_VISION=llava-llama3
SUMMARY_MODEL=dolphin-mistral
SUMMARY_CHUNK_SIZE=4000
SUMMARY_CHUNK_OVERLAP=100
SUMMARY_TPW=1.3
SUMMARY_LENGTH_RATIO=4
SUMMARY_MIN_LENGTH=150
SUMMARY_TOKEN_LIMIT=4096
SUMMARY_INSTRUCT='You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
SUMMARY_INSTRUCT_TTS='You are an AI assistant that provides email summaries for Sanjay -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
DEFAULT_VOICE=Luna
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
# The exact values here will depend on what software you are using to inference an LLM,
# and of course what models and capabilities are available through it. The script was
# designed for use with `ollama`, but most of the functionality should be equal with
# LM Studio, LocalAI, ect...
#
# DEFAULT_LLM is self-explanatory; DEFAULT_VISION is used for image recognition within
# a multimodal chat context, such as on the ig module for generating intelligible
# comments to Instagram posts, or more realistic captions for sd-generated images.
#
# Note it's possible to specify a separate model for general purposes and for
# summarization tasks. The other SUMMARY_ variables call for some explanation,
# in particular six that are most relevant when summarizing very long documents:
#
# SUMMARY_CHUNK_SIZE: determines the maximum length, in tokens, the pieces that are
# split and sent individually to the model.
#
# SUMMARY_CHUNK_OVERLAP: determines how much of each chunk is overlapped with the prior
# and next chunks. Set too high causes repetition, set too low
# causes misunderstood confusion and poor summary results.
# The summarization algorithm is flawed but I've gotten the best
# results with this set around 100200.
#
# SUMMARY_TPW: used in estimating the token count of a prompt for purposes of
# complying with the maximum tokens a model can handle at once.
# Best you can do is estimate. I tend to use long words a fair
# excessively and found my average was 1.3 tokens per word. YMMV.
#
# SUMMARY_LENGTH_RATIO: this is the primary control over the length of generated
# summaries, expressed as the ratio of original text length to
# summary length. The default, 4, means the summaries will be
# around 1/4 the length of the original text you provide it.
#
# SUMMARY_MIN_LENGTH: the default SUMMARY_LENGTH_RATIO of 4 isn't ideal for very
# short texts, but setting it any lower sacrifices conciseness
# in summaries of longer texts. In short one size doesn't fit
# all. The compromise I landed on was to set a "maximum minimum"
# summary length: under no circumstances will the script impose
# a smaller maximum length than this value.
#
# SUMMARY_INSTRUCT: sets the prompt used when summarizing text.
#
# SUMMARY_INSTRUCT_TTS: sets a separate prompt for use when summarizing text where
# tts output was requested; tends to yield "cleaner" audio
# with less numbers (page numbers, citations) and other
# information extraneous to spoken contexts.
#
# DEFAULT_VOICE: used for all tts tasks when a specific voice is not requested.
# ──────────
#
#────,-_/────────── W E C A N E X P E R I M E N T W I T H ──────────.───────────
# ' | ,~-,-. ,-. ,-. ,--. | --' ,--. ,-. ,--. ,-. ,-. |-- . ,-. ,-.
# .^ | | | | ,--| | | | --' | -,- | --' | | | --' | ,--| | | | | | |
# `--' ' ' ' `-^ `-| `--' `---| `--' ' ' `--' ' `--^ `' ` `-' ' '
# , | ,-. | ~ I N T H E N U D E . ~
# `~~' `-+'
# O R F U L L Y C L O T H E D ── U P T O Y O U
#
#─── via comfyui (stable diffusion): ───── ( B U T L E T M E K N O W , Y E A H ? )
COMFYUI_URL=http://localhost:8188
COMFYUI_DIR=/Users/sij/workshop/sd/ComfyUI
PHOTOPRISM_USER=NOT_IMPLEMENTED
PHOTOPRISM_PASS=NOT_IMPLEMENTED
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
# COMFY_URL, as you may expect, should point to the URL you use to access ComfyUI. If you
# don't know, watch for it in the server logs once ComfyUI is fully launched.
#
# COMFYUI_DIR, with similar self-evidence, should point to the base directory of your
# ComfyUI installation (i.e. the folder that contains `models`, `inputs`, and `outputs`)
#
# PhotoPrism integration is not yet implemented, so don't bother with that just yet.
# ──────────
#
# D O N ' T M I S S O N E ───────────────────────────────────────
#\ F I N A L S M A T T E R I N G O F M I S C E L L A N Y \
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ _/\\\\_ _ _ _ _ _ /\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\\\\_ _ _ _ /\\\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\//\\\_ _ /\\\//\\\ _ _/\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\\///\\\/\\\/ \/\\\ _ _///_ _ _/\\\\\\\\\\_ _ _ _/\\\\\\\\_ _\
# \ _ _ \/\\\ _\///\\\/ _ \/\\\ _ _/\\\ _ \/\\\////// _ _ _/\\\////// _ _\
# \ _ _ \/\\\ _ _\/// _ _ \/\\\ _ _/\\\ _ \/\\\\\\\\\\_ _ /\\\_ _ _ _ _ _\
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ \////////\\\_ _\//\\\ _ _ _ _ _\
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ _/\\\\\\\\\\_ _ \///\\\\\\\\_ _\
# \ _ _ \///_ _ _ _ _ _ _ \///_ _ _///_ _ \////////// _ _ _ \//////// _ _\
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# ─────────────────── A N D O T H E R W H A T - H A V E - Y O U S ──
#
#─── other needful API keys, mainly: ────────────────────────────────────────────────────
CF_TOKEN=ESjjVFHXfe6NrBo5TrN4_AfhHNezytCVlY-VS2HD
VISUALCROSSING_API_KEY=DAEJSKWJQ2CHM3J6B7C5FWQZV
ELEVENLABS_API_KEY=01eeafb6ce0f6d1fd70e4aa9e7262827
COURTLISTENER_BASE_URL=https://www.courtlistener.com
COURTLISTENER_API_KEY=your_courtlistener_api_key_here
TIMING_API_URL=https://web.timingapp.com/api/v1
TIMING_API_KEY=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiIxIiwianRpIjoiODMyNTMwYTAxYjJhNzdlOTgzZWRlMjRiNDdkMmY0YWYzYWU3YTIzYjkxM2QyNTFmZjE1YTQ4YTkzYjI3YTQ4M2I0MzE5MzU2MzZlMjYyZWYiLCJpYXQiOjE2OTI0MjEyNjkuMDc4MzU4LCJuYmYiOjE2OTI0MjEyNjkuMDc4MzYsImV4cCI6MjAwODA0MDQ2OS4wNzMzMjcsInN1YiI6IjM2MTgxNzA4NDcyMTEwMzg5NzYiLCJzY29wZXMiOltdfQ.fVhhJtYb6wtHBQj7J9sxTsT3nb6_BLu4ynqNMC-SpJ2exj31wF7dHXfdGF-ZCg_H2TWh8Jsrak7ovwHsMPvkLRPgxkyjkyLgVbnzZICbP0xffrsguTnillXKCbEkwOVo4s7esf829VVagHCkpNwYbfKLJ9FLHIqs0hQMhh_S7jpbzmXUe7t6tUG43IgILBD0IwphJ2BGs5X2fhjW8FkCke85JxbQ4a29cqYtgFJ7tMP97noTFB4e_gxFpHUl-Ou_bwdpBKfarTyxhtwm1DJkQB_MrAX4py8tmFlFFJPd-7WG-LaswiI7bctN3Lux-If5oxAhm29PkS3ooxvJD86YDR0rJ94aGc8IBypnqYyGFW1ks5fsQ057UwC3XK62ezilWdamh7gtcInShSdHr7pPtIxntCe3x039NSVTBIQ54WHNaWrfI0i83Lm61ak7Ss3qotJhwtIv0aUOUKS3DOz7jfL4Z4GHUjXgBmubeC_vuLHUVCutBsiZ4Jv4QxmWKy2sPlp-r2OgJlAPkcULvTu1GvXavRTrXb7PXkEKO4ErdBTvu2RyA6XLR1MKs0V7mRNvBfuiMRvWRuv9NBLh6vpgRTVo5Tthh-ahSQ-Rd6QcmvVNf-rLnxWGY4nOdx6QLcYKPukQourR2o6QzxGcpb9pDc8X0p2SEtDrDijpy6usFxk
MAC_ID=sij-mbp16
MAC_UN=sij
MAC_PW="Synchr0!"
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
#
# CF_TOKEN: a Cloudflare token. This is used on the cf router for quick
# deployment of new domains in tandem with Caddy and for ddns.
#
# VISUALCROSSING_API_KEY: used for obtaining weather forecasts. It is a very data-rich
# yet affordable source of weather info, with a generous free
# plan.
#
# ELEVENLABS_API_KEY: used when on the tts router if tts tasks are outsourced to
# the state-of-the-art models at ElevenLabs.
#
# COURTLISTENER_API_KEY: used primarily on the hooks router, but likely relevant only
# to legal professionals that will be aware what it is for.
#
# TIMING_API_URL: are used on the time router for generating various tasks
# & related to timekeeping, as well as on the notes router for
# TIMING_API_KEY: generating markdown-formatted timeslips. It requires an
# active subscription to the Timing app (macOS or web), but
# it's worth noting comes included in the SetApp subscribtion
# bundle, for the same price, last I checked, as subscribing to
# Timing alone. If you have a Mac and somehow don't know this
# already, SetApp is an utterly insane value. I pay $15/mo for
# apps that I would otherwise pay ~$100/mo for if subscribing
# individually. I want to say I wasn't paid to say this, but
# with those savings I almost feel like I was.
#
# MAC_ID: These last three variables are for a specific use case where
# MAC_UN: you want certain commands run, or alerts appearing, on a
# MAD_PW: designated macaOS computer. The alerts router is designed to
# deliver OS-level notifications to the specified Mac when a
# webhook gets a hit on specified keywords within the payload.
# Setting the MAC_ID to the TS_ID of the target Mac, allows
# the script to readily know whether it itself is the target
# (this is relevant in a load-balancing context), and how to
# reach the target if not — to wit, ssh using MagicDNS.
```

47
requirements.txt Normal file
View file

@ -0,0 +1,47 @@
python-dotenv
setuptools
PyPDF2
fastapi
pdf2image
pdfminer
pytesseract
python-dateutil
python-docx
hypercorn
starlette
httpx
pydantic
pytz
requests
aiohttp
paramiko
tailscale
pandas
pydub
torch
selenium
webdriver_manager
faster_whisper
filetype
html2text
markdown
ollama
aiofiles
bs4
imbox
newspaper3k
python-magic
urllib3
whisper
huggingface_hub
numpy
tqdm
tiktoken
numba
scipy
vectordb
IPython
torchaudio
lxml
lxml_html_clean
pdfminer.six

61
setup.py Normal file
View file

@ -0,0 +1,61 @@
from setuptools import setup, find_packages
setup(
name='sijapi',
version='0.1',
packages=find_packages(),
entry_points={
'console_scripts': [
'sijapi = sijapi.__main__:main',
],
},
install_requires=[
'fastapi',
'python-dotenv',
'hypercorn',
'setuptools',
'PyPDF2',
'pdf2image',
'pdfminer',
'pytesseract',
'python-dateutil',
'python-docx',
'starlette',
'httpx',
'pydantic',
'pytz',
'requests',
'aiohttp',
'paramiko',
'tailscale',
'pandas',
'pydub',
'torch',
'selenium',
'webdriver_manager',
'faster_whisper',
'filetype',
'html2text',
'markdown',
'ollama',
'aiofiles',
'bs4',
'pdfminer.six',
'lxml_html_clean',
'imbox',
'newspaper3k',
'python-magic',
'urllib3',
'whisper',
'huggingface_hub',
'numpy',
'tqdm',
'tiktoken',
'numba',
'scipy',
'vectordb',
'IPython',
'torchaudio'
],
)

92
sij.asc Normal file
View file

@ -0,0 +1,92 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
mQINBGY+fL4BEADCpz8FAfa6/7i9mEQCYlwwP2k9DlrUzz+u9BL4BmuoTEcGty9M
7EA2ivRxXo371IIMjL/GyAa8I3WHMEhxuRlGldUQaHzo6PicTn+OiLJ/g2vCfStN
jIYog3WC25P7Es1n1hDuOu8rUL93twXZ4NevgYx+G44M7Q+/1AbSXf83kpawlHhg
HcGmH2vt9UulfTGAvN9s2sH2pn89812lpWLSdPARNw09ePZy4RdiEgJ6t+S+wjaE
Ue/H4FcQC1MLrQnkW5soUOduY9HN0iUk/xZqqkRQctl3ds5oInE483vQsL0HKFvs
MB8lBdXTbVzxvpFe+fvT8d6hiZ/YgxIUEl1KZLDd3atqj+UREuG+LABZUKC4nSUP
EXneXUqi4qVCW9827K9/H+IKahe8OE+OrZAsSfLtsp4AznIxgyQbvpUZzCuRASJN
Kt1cjcJBOv5L0HJ8tVykZd23WuKUXiyxTs1MxsDGyjew30IsAg4WNO/iw9vBO/Yu
pfjlZTcgbghdIuNmOrnCyzKWtUxxfDtWwEBBshbTKusOaGhauBaHrRVE7lKlTblM
x1JIzYBziDmFy25J1XvYb3guilk1yy54poLQaEcE54mQYWHKCNS4eQeL5dJR3Nmu
Pt9GXdMyNO3uyog3WYpyYqch+osbBsHFVNUClxMycnyqZzHQeZHPNJBzJwARAQAB
tC5TYW5neWUgSW5jZS1Kb2hhbm5zZW4gKEF0dG9ybmV5KSA8c2lqQHNpai5sYXc+
iQJXBBMBCABBAhsDBQkHhh8tBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMjqK
LEezdiJLNhO3U1smWu2+W0QFAmY+fPUCGQEACgkQU1smWu2+W0SwBQ/+L5S1fIop
6iQ/6gQENBNCUVgACWP0/ViJzQGo4iF3UZkV5KV8pgk/TenZSXCLxUj6UpSAe25m
vtrGV4NCL2hLn1NPK11Na6IM1ykfh/L67NKeCqmtQYwNLwW0o0fvUpK9fahPxhmv
EFo+lVCabQndgzmLxnUhxH4qkGSejsaSFoJQ6fVl/DExCL4w/R5rStnRMKDtkuF1
ONfjZpuLrAylx8Ypf/rocQYn5AJcRD5ZL2bGgDZNe85VNBFmD3b2cGSVpm3J6Rg/
fPfs1lgtpgXWbBDCF8nRY326Utbr3qoeZUXVQjVZ05Q2SpUYFHiDZJ3EFwQikg5n
cIBfcXQZQhTq/OK0eS0vB1li8m1ce9m8iMC+Pxe5toPkxFV5RO1+o5PG1SyOfzfV
F1c0O9JQqdJzRHoTuqLtVhlmRVBU2d6TjWYlZ6TwPShSTLu0Tkm4EeFJS4oag75d
q7LlIIvrWS4n3CqVpC/PEIUtclytkOkvNQaSWHEVkappS3UjkX1BJmaI8zXYh9jh
sV/5FckvwYnky+w6geFOBs34NW0rg9oNw4KNAywYcOPbI/Ev1z57my+MpA5msw+B
ww9sFC+tzQCSJl0FU2Dg2YMnyqfUtGr9HfXdAGuuUVh+cYFmEdwwZqBWl37pNIGL
SxfF1AdrlHCSpJcLVETe80UraMFAI7tyOwe0L1Nhbmd5ZSBJbmNlLUpvaGFubnNl
biA8c2FuZ3llaWpAd2VzdGVybmxhdy5vcmc+iQJUBBMBCAA+FiEEMjqKLEezdiJL
NhO3U1smWu2+W0QFAmY+fOgCGwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgEC
F4AACgkQU1smWu2+W0RlnBAArwaFta9NTRdubTqctv1EET1D9OXAE/R5vdSk2jRQ
1CMYmv6KeMm0Rl7+dNFet/vJOEtITF7TZHnt7WBy7n5m+SIoARsaZYEchjZKsE2g
6RvRWqFGYuUYQWTRKsw0b2tT16BaNLKdV/w3ndRQNS6wDJrW1dRnIWxm4z26d3/H
Rt3o8+LUVxdSWGLliKZU00S+FNPVSwWe/X7+CoIE7T5XZL+OIEJ6DfpK2pkHKT6D
FswF3KOLG36vz5eISk4AT+o9AEoFIpX0hce3DMixEYQSgKN230K8RchC59bO81zE
w7Mic4vpn/wKFhicn+0BA1aJzzOd8iEwiA0p5baq4b2xIwCBiO4uv/HXR1SN1Tfk
QozjAGzl8LzrmwGTWOtOSk/7ckPhPR2MGNhMdtJ7rPeHxImJLh+/f4uBmYnQUdw4
0j3sMpJmrShW5dXJ8YHqVFfqabYD8HkBztdYI0qGJDpQjEbW6V+DvMWQXOZ8c1ul
NN2vZyY25RkypMQLiphImJa+q6eGtBEas40MeAkgQKIBPBBpb6W1km+m6UnOADKB
0/vOWcZMgijyMPp7WvwXbOwmXI27rHsUTvhFDLPI113a9I5bU8j6VyW2s/sst3Xc
OQDzEgR3KvD4dWjczIg6yliIq9eM5hskpsYyfDfWRWrIbR3Tg8XPwnQRB9dPEHIy
rKS0KVNhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2FuZ3llQHJpc2V1cC5uZXQ+iQJU
BBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fQYCGwMFCQeGHy0FCwkI
BwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0SKGA//VRGpS7IwOOlHF7OI
+LEMDebLpLB2PswfWrK+sI9YdXXV/CaT0NcCz2HPCoK+coaDkl4cxh10ykVbjO36
wZc/rvhpzga2wMLpBLNwpTvSlfMwsCQeRQay498bgdR59gf1hYa/dPYKKrBgNxHa
Kc3dMDWU0adpV4zV1s/iFNQQZfmhUah+8TTlB03hahPzn8V7CqQF+jTfSXiWPv/V
eD1W6Sc1juvLTVxTThbM5ewiIhMP2t7KM+M4viOEqce79IcE2HTcpCaEI7Lh/Eld
9VBZZk/gENuPqyQuLbOIOQhC6LYRZkZC9Vv1FDutfWV5ZBPyaTY/n5pGW3lo+Tfa
FLSamQcD6dyiGm/ZyQbPUDt2aWhqRGr7VvvtfyXLazL9T9Y6ASr5UjLakPr5ihUz
B8InRch9ACPbu7QSIGFk9PQgHme2Cd/HMRLIALnkAmrafgDE+14Rlp9qI2nYhWdD
jkZcLalPXQCDBxUfj1q192Nn3wlKsDkDd2RWT7Mc2RJq2FR36KADPMtz2oJPSib4
eRgI40E9Wv+zqHDDTU2K/bLi3nmBHvKnXWXPyiBPVL+CAoAhkYHHJwNuRQfxlukq
heS4/CMBRB04foTeu2ltl6/sQdAIyBGKbOC6fMyhJFYbi16nWI6j7iw2XQnqyitu
jC8Pz14NfIAQTpKCVcV32Kn2k1+0I1Nhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2lq
QGVudi5lc3E+iQJUBBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fRIC
GwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0Rbxw/+
OMYnlyXvo146+3M6JGdvW36CWmc9ZcmaU+xJM3FnG91WNo5J8MnHl0Ks9BwjNWtm
VJgFEdi2EVpSLJnYdQyJILCNt8RAclYvbFHYUOIDEEC2yr5ZKt/odwYAXPxaqQ4O
Sj7R2GbLA52O8zGWfARBAnAQycrlBRjItdpzGeWgRST8O/ot/IkU7xsAKW72E2VB
9jlCahp5c01lEideVqzVhk3z6GzVz1NUKsglgEOmTIjld4mMs+4GX/93q0u1erKO
I7Q6RL6lfdc2opGi5jFMXGWhLLgX2SSsBFJRuSQGnTpbx3XWFS5uA+cku7Fh0fC0
MKr2vsY18Z6OqU0MdQm6ovIVcvhzIdGfnBU9Ct98DMiUhDCmx3o9XneWj1n7kWKM
gT8s8AvE27tidtkZApwIKHdUy6qfyqwRjxE+KdL6Eh48x3TVYep+wfSfPJ1eq9Ne
7WWXKUx6FGNH01hpQdTLbCYqmwMa03och1wwyi+0wc8rHe6k6y2tURtP3mINkDeV
u1QmVaGRDA2r7oDm9UsFeupGsbFBnTkQIfJgnrLRJFfN2FDJPZDcd/VS71AOSL5C
jY+Dr/WHYPWeN8MHXfG4r/P41wsrnAJEAzSvLRQ9GYCLPe825W+uDJx9eMePodFa
BeIBcM633WXpbIXHnRQhPDfTzejCejO6GoPE7PbtBBi5Ag0EZj58vgEQAPUqNOgQ
kAPd/S+nkGAfvnzC5UD6lVWaQTVL/xU2K1J8l11K5Ck4nq7oMKhzEitu0lA+jG7q
JVwXMj9+rnoGlbIYmmxCZYpSit930Mss3HjYU8IAF4nybGwc5+wO77cldk3WJTI0
EkFgiM4Jk6Gk/tRf1LgMIfJIUgm8MooPLqg2z5Pj+bbwxw42A20enEwtF3ivEETJ
wuJwsp5uCOAfzOGqqBvp19PMTPynUBuwEXCkJfb0CCz+5yhjoi6ZjCVXxjuoe2wN
jFwoYd8odfSuvC6Fh9qqXnjF7HZLxEyN7K1L/y/sWarsN01zbUUI3kZlnTuamDu4
LdZtl2q3QqDyxmzHIWLTa1qL0s3WooB7JJqBYaNmQjLHadoktZ4vfhl7kjXYsg+i
84oipL83u2cRHplpqnRk9qVwNdW01EObjNafWY6t3942sM4e/yOdQiaXlxivPuHV
VYwme6K53lmGcV3ipMWRpNkme+oKV/TdYTTdlDaLgC8ga5AW6poNoSp5UpNeOs0E
mxIZivpRQSCr3g+jScy0RdX/+tI1gWe+2ZIHFwR+1WsXvLXHyd1wVyH4vDxSf1bE
VRVsXLZDT/xMGDzNzAC76kzoIykrcndFiTbNzB/LjZJuls6fRdN07bTcymWEKYiP
Ia6iGdag6+ueoX4eDzbjCvldKtkfr/EhB7MfABEBAAGJAjwEGAEIACYWIQQyOoos
R7N2Iks2E7dTWyZa7b5bRAUCZj58vgIbDAUJB4YfLQAKCRBTWyZa7b5bRLZdEACk
AaXNVeywC9+X6bdwkKV5Jl6Hv238cGd58TuVbjd+tii1JazbKEqCAr5tTlGtrUZg
fyjM0z5sMKDSZ15paX4xDbDs+xdfMxLVdjmFlZgwTrrTSIx3ODxPo/sSeyrzGZrQ
hlZjOHP1Bvln0OTQwK0yE3Eaip0FhIpJA5FX3yrZfvza3St5leNOXsZgEri68cgf
mVhS9tBD2I9TpCVwgq5vRnloAMgtQBYr8N9glXBfs2WsPhU96HSSH88osJW+lCkG
vTtzQBEjnnSQ/ssHBYz4DfpsJe1fbM+9WVow6q2nkUhqg5TfdAt4H0ra2uPXnNz8
lvQObVHlw7T0w5UTzgBdlCyYplyTG2gcZi+UWzit6YH9DH82j1otcq3+3NlrKwo0
TSJKZNagiqgJNZ1mhJQTt3JDacFFkBBxLf6trruuyInRU1leo87hzHCxIlMbQPqh
ogtV+W9FHElVJwoTQi8YF+0AacZPzK8wJmlPLxBeqs+ULJ8H5wZxlEBB1Jj91/W9
6R8m2IUZCsXNNpYU+f7uB8x0RUS3pU8S7GcwdJmOa16Xc4VdfWugm4TTEtajeSYC
ek5j/2s/QkAum5slT2Y6Aam0Jj/IhsGHKVEnR6DS01mZqVeeu0giPFUO4ZX5C0n9
mAmw/ZUGIOj6ls3KMBHv4pqQI7nd00tW8eIMgKGgKQ==
=PhPl
-----END PGP PUBLIC KEY BLOCK-----

241
sijapi/__init__.py Normal file
View file

@ -0,0 +1,241 @@
import os
from pathlib import Path
import ipaddress
import multiprocessing
from dotenv import load_dotenv
from dateutil import tz
from pathlib import Path
from pydantic import BaseModel
import traceback
import logging
from . import logs
### Logs ###
HYPERCORN_LOG_LEVEL = None
LOGGER = logging.getLogger('LOGGER')
def DEBUG(d): LOGGER.debug(d)
def INFO(i): LOGGER.debug(i)
def WARN(w): LOGGER.warning(w)
def ERR(e):
LOGGER.error(e)
LOGGER.error(traceback.format_exc())
def CRITICAL(c):
LOGGER.critical(c)
LOGGER.critical(traceback.format_exc())
# from sijapi.config.config import load_config
# cfg = load_config()
### Initial initialization
BASE_DIR = Path(__file__).resolve().parent
CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = CONFIG_DIR / ".env"
load_dotenv(ENV_PATH)
### API essentials
ROUTERS = os.getenv('ROUTERS', '').split(',')
PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',')
GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY")
# HOST_NET and HOST_PORT comprise HOST, which is what the server will bind to
HOST_NET = os.getenv("HOST_NET", "127.0.0.1")
HOST_PORT = int(os.getenv("HOST_PORT", 4444))
HOST = f"{HOST_NET}:{HOST_PORT}"
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
TRUSTED_SUBNETS = [ipaddress.ip_network(subnet.strip()) for subnet in os.getenv('TRUSTED_SUBNETS', '127.0.0.1/32').split(',')]
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
### Directories & general paths
HOME_DIR = Path.home()
ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts"
os.makedirs(ALERTS_DIR, exist_ok=True)
LOGS_DIR = BASE_DIR / "logs"
os.makedirs(LOGS_DIR, exist_ok=True)
REQUESTS_DIR = LOGS_DIR / "requests"
os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
### Databases
DB = os.getenv("DB", 'sijdb')
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
DB_PORT = os.getenv("DB_PORT", 5432)
DB_USER = os.getenv("DB_USER", 'sij')
DB_PASS = os.getenv("DB_PASS")
DB_SSH = os.getenv("DB_SSH", "100.64.64.15")
DB_SSH_USER = os.getenv("DB_SSH_USER")
DB_SSH_PASS = os.getenv("DB_SSH_ENV")
DB_URL = f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB}'
### LOCATE AND WEATHER LOCALIZATIONS
USER_FULLNAME = os.getenv('USER_FULLNAME')
USER_BIO = os.getenv('USER_BIO')
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
HOME_ZIP = os.getenv("HOME_ZIP") # unimplemented
LOCATION_OVERRIDES = DATA_DIR / "loc_overrides.json"
LOCATIONS_CSV = DATA_DIR / "US.csv"
# DB = DATA_DIR / "weatherlocate.db" # deprecated
VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline")
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
### Obsidian & notes
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or HOME_DIR / "Nextcloud" / "notes")
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
OBSIDIAN_BANNER_DIR = f"{OBSIDIAN_RESOURCES_DIR}/banners"
os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_BANNER_DIR, exist_ok=True)
OBSIDIAN_BANNER_SCENE = os.getenv("OBSIDIAN_BANNER_SCENE", "wallpaper")
OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian")
DOC_DIR = DATA_DIR / "docs"
os.makedirs(DOC_DIR, exist_ok=True)
### DATETIME SCHEMA FOR DAILY NOTE FOLDER HIERARCHY FORMATTING ###
YEAR_FMT = os.getenv("YEAR_FMT")
MONTH_FMT = os.getenv("MONTH_FMT")
DAY_FMT = os.getenv("DAY_FMT")
DAY_SHORT_FMT = os.getenv("DAY_SHORT_FMT")
### Large language model
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "dolphin-mistral")
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "Luna")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
### Stable diffusion
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
os.makedirs(SD_WORKFLOWS_DIR, exist_ok=True)
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py')
### Summarization
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 100)) # measured in tokens
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
SUMMARY_INSTRUCT = os.getenv("SUMMARY_INSTRUCT", "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "llama3")
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 4096))
### ASR
ASR_DIR = DATA_DIR / "asr"
os.makedirs(ASR_DIR, exist_ok=True)
WHISPER_CPP_DIR = HOME_DIR / str(os.getenv("WHISPER_CPP_DIR"))
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'NULL,VOID').split(',')
### TTS
PREFERRED_TTS = os.getenv("PREFERRED_TTS", "None")
TTS_DIR = DATA_DIR / "tts"
os.makedirs(TTS_DIR, exist_ok=True)
VOICE_DIR = TTS_DIR / 'voices'
os.makedirs(VOICE_DIR, exist_ok=True)
PODCAST_DIR = TTS_DIR / "sideloads"
os.makedirs(PODCAST_DIR, exist_ok=True)
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
### Calendar & email account
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
class IMAP_DETAILS(BaseModel):
email: str
password: str
host: str
imap_port: int
smtp_port: int
imap_encryption: str = None
smtp_encryption: str = None
IMAP = IMAP_DETAILS(
email = os.getenv('IMAP_EMAIL'),
password = os.getenv('IMAP_PASSWORD'),
host = os.getenv('IMAP_HOST', '127.0.0.1'),
imap_port = int(os.getenv('IMAP_PORT', 1143)),
smtp_port = int(os.getenv('SMTP_PORT', 469)),
imap_encryption = os.getenv('IMAP_ENCRYPTION', None),
smtp_encryption = os.getenv('SMTP_ENCRYPTION', None)
)
AUTORESPONSE_WHITELIST = os.getenv('AUTORESPONSE_WHITELIST', '').split(',')
AUTORESPONSE_BLACKLIST = os.getenv('AUTORESPONSE_BLACKLIST', '').split(',')
AUTORESPONSE_BLACKLIST.extend(["no-reply@", "noreply@", "@uscourts.gov", "@doi.gov"])
AUTORESPONSE_CONTEXT = os.getenv('AUTORESPONSE_CONTEXT', None)
AUTORESPOND = AUTORESPONSE_CONTEXT != None
### Courtlistener & other webhooks
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
### Keys & passwords
PUBLIC_KEY_FILE = os.getenv("PUBLIC_KEY_FILE", 'you_public_key.asc')
PUBLIC_KEY = (BASE_DIR.parent / PUBLIC_KEY_FILE).read_text()
MAC_ID = os.getenv("MAC_ID")
MAC_UN = os.getenv("MAC_UN")
MAC_PW = os.getenv("MAC_PW")
TIMING_API_KEY = os.getenv("TIMING_API_KEY")
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
### Tailscale
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
TS_ID = os.getenv("TS_ID", "NULL")
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
### Cloudflare
CF_API_BASE_URL = os.getenv("CF_API_BASE_URL")
CF_TOKEN = os.getenv("CF_TOKEN")
CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
### Caddy - not fully implemented
BASE_URL = os.getenv("BASE_URL")
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
### Microsoft Graph
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
MS365_SECRET = os.getenv('MS365_SECRET')
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
MS365_KEY = MS365_KEY_PATH.read_text()
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
### Maintenance
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours

133
sijapi/__main__.py Executable file
View file

@ -0,0 +1,133 @@
#!/Users/sij/miniforge3/envs/api/bin/python
from fastapi import FastAPI, Request, HTTPException, Response
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import ClientDisconnect
from hypercorn.asyncio import serve
from hypercorn.config import Config
import sys
import asyncio
import httpx
import argparse
import json
import ipaddress
import importlib
from dotenv import load_dotenv
from pathlib import Path
from datetime import datetime
from . import logs
parser = argparse.ArgumentParser(description='Personal API.')
parser.add_argument('--debug', action='store_true', help='Set log level to DEBUG')
parser.add_argument('--test', type=str, help='Load only the specified module.')
args = parser.parse_args()
logs.setup("debug")
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOST, ENV_PATH, GLOBAL_API_KEY, REQUESTS_DIR, ROUTER_DIR, REQUESTS_LOG_PATH, PUBLIC_SERVICES, TRUSTED_SUBNETS, ROUTERS
# Initialize a FastAPI application
api = FastAPI()
# CORSMiddleware
api.add_middleware(
CORSMiddleware,
allow_origins=['*'],
allow_credentials=True,
allow_methods=['*'],
allow_headers=['*'],
)
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
client_ip = ipaddress.ip_address(request.client.host)
if request.method == "OPTIONS":
# Allow CORS preflight requests
return JSONResponse(status_code=200)
if request.url.path not in PUBLIC_SERVICES:
if not any(client_ip in subnet for subnet in TRUSTED_SUBNETS):
api_key_header = request.headers.get("Authorization")
api_key_query = request.query_params.get("api_key")
if api_key_header:
api_key_header = api_key_header.lower().split("bearer ")[-1]
if api_key_header != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
WARN(f"Invalid API key provided by a requester.")
return JSONResponse(
status_code=401,
content={"detail": "Invalid or missing API key"}
)
response = await call_next(request)
# DEBUG(f"Request from {client_ip} is complete")
return response
api.add_middleware(SimpleAPIKeyMiddleware)
canceled_middleware = """
@api.middleware("http")
async def log_requests(request: Request, call_next):
DEBUG(f"Incoming request: {request.method} {request.url}")
DEBUG(f"Request headers: {request.headers}")
DEBUG(f"Request body: {await request.body()}")
response = await call_next(request)
return response
async def log_outgoing_request(request):
INFO(f"Outgoing request: {request.method} {request.url}")
DEBUG(f"Request headers: {request.headers}")
DEBUG(f"Request body: {request.content}")
"""
@api.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}")
ERR(f"Request: {request.method} {request.url}")
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
@api.middleware("http")
async def handle_exception_middleware(request: Request, call_next):
try:
response = await call_next(request)
except RuntimeError as exc:
if str(exc) == "Response content longer than Content-Length":
# Update the Content-Length header to match the actual response content length
response.headers["Content-Length"] = str(len(response.body))
else:
raise
return response
def load_router(router_name):
router_file = ROUTER_DIR / f'{router_name}.py'
DEBUG(f"Attempting to load {router_name.capitalize()}...")
if router_file.exists():
module_path = f'sijapi.routers.{router_name}'
try:
module = importlib.import_module(module_path)
router = getattr(module, router_name)
api.include_router(router)
INFO(f"{router_name.capitalize()} router loaded.")
except (ImportError, AttributeError) as e:
CRITICAL(f"Failed to load router {router_name}: {e}")
else:
WARN(f"Router file for {router_name} does not exist.")
def main(argv):
if args.test:
load_router(args.test)
else:
CRITICAL(f"sijapi launched")
CRITICAL(f"{args._get_args}")
for router_name in ROUTERS:
load_router(router_name)
config = Config()
config.keep_alive_timeout = 1200
config.bind = [HOST]
asyncio.run(serve(api, config))
if __name__ == "__main__":
main(sys.argv[1:])

253
sijapi/config/.config.yaml Normal file
View file

@ -0,0 +1,253 @@
TZ: 'America/Los_Angeles'
API:
BIND: 0.0.0.0
PORT: 4444
URL: https://api.sij.ai
PUBLIC:
- /id
- /ip
- /health
- /img/
- /cl/dockets
- /cl/search
- /cd/alert
TRUSTED_SUBNETS:
- 127.0.0.1/32
- 10.13.37.0/24
- 100.64.64.0/24
ROUTER:
asr: ON
calendar: ON
cf: OFF
email: ON
health: ON
hooks: ON
ig: OFF
llm: ON
locate: ON
note: ON
sd: ON
serve: ON
summarize: ON
time: ON
tts: ON
weather: ON
## DO NOT MODIFY: ##
ENV: '{{ DIR.CONFIG }}/.env' # auto-configured
KEYS: ['{{ SECRET.GLOBAL }}'] # sourced from .env
## DO NOT MODIFY: ##
DIR:
BASE: '{{ AUTO.BASE_DIR }}'
ROUTERS: '{{ DIR.BASE }}/routers'
DATA: '{{ DIR.BASE }}/data'
CONFIG: '{{ DIR.BASE }}/config'
LOGS: '{{ DIR.BASE }}/logs'
ALERTS: '{{ DIR.DATA }}/alerts'
REQUESTS: '{{ DIR.DATA }}/requests'
REQUESTS_LOG: '{{ DIR.LOGS }}/requests.log'
HOST:
## DO NOT MODIFY: ##
TS:
IP: '{{ AUTO.IP }}' # auto-configured
ID: '{{ AUTO.TS_ID }}' # auto-configured
MAX_CPU_CORES: '{{ MAX_CPU_CORES }}' # auto-configured
CADDY:
TS:
IP: 100.64.64.15
ID: 'sij-namecheap-vps'
CADDYFILE: '/etc/caddy/Caddyfile'
LOGS: '/etc/caddy/logs'
TS:
ID: sij-mbp16
IP: 100.64.64.20
SUBNET: 100.64.64.0/24
MDNS: starling-sailfin.ts.net
## DO NOT MODIFY: ##
API_KEY: '{{ SECRET.TS }}' # sourced from .env
DB:
NAME: sij
HOST: 127.0.0.1
PORT: 5432
USER: sij
SSH_USER: sij
## DO NOT MODIFY: ##
SSH_PASS: '{{ SECRET.DB_SSH }}' # sourced from .env
URL: 'postgresql://{{ DB.USER }}:{{ SECRET.DB }}@{{ DB.HOST }}:{{ DB.PORT }}/{{ DB.NAME }}' # auto-configured
OBSIDIAN:
DAILY_NOTE:
YEAR: '%Y'
MONTH: '%Y-%m %B'
DAY: '%Y-%m-%d %A'
DAY_SHORT: '%Y-%m-%d'
DIR: '{{ HOME_DIR }}/Nextcloud/notes' # you can specify the absolute path or use '{{ HOME_DIR }}' followed by a relative path
MS365:
STATUS: OFF
AUTH:
TENANT: bad78048-a6e0-47b1-a24b-403c444aa349
CLIENT_ID: ce8cbd24-f146-4dc7-8ee7-51d9b69dec59
LOGIN: 'https://login.microsoftonline.com'
REDIRECT: 'https://api.sij.ai/o365/oauth_redirect'
SCOPES:
- basic
- calendar_all
- Calendars.Read
- Calendars.ReadWrite
- offline_access
## DO NOT MODIFY: ##
SECRET: '{{ SECRET.M365_SECRET }}' # sourced from .env
TOKEN_FILE: '{{ DIR.CONFIG }}/ms365/oauth_token.txt' # auto-configured
ICAL:
STATUS: ON
CALENDARS:
- ''
EMAIL:
STATUS: ON
FULLNAME: 'Sangye Ince-Johannsen'
IMAP:
STATUS: ON
EMAIL: 'sij@sij.law'
HOST: 127.0.0.1
PORT: 1142
ENCRYPTION: STARTTLS
## DO NOT MODIFY: ##
PASSWORD: '{{ SECRET.IMAP }}' # sourced from .env
SMTP:
STATUS: ON
EMAIL: 'sij@sij.law'
HOST: 127.0.0.1
PORT: 1024
SMTP_ENCRYPTION: SSL
PASSWORD: '{{ SECRET.SMTP }}' # sourced from .env
SUMMARY:
INSTRUCT: 'You are an AI assistant that provides email summaries for Sanjay -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
AUTORESPONSE:
INSTRUCT: ''
CONTEXT: 'he is taking a leave of absence until July 20, 2024 and will be unable to respond personally to emails received before then.'
USER_BIO: 'a public interest environmental lawyer DIR.BASEd in Eugene who works at the Western Environmental Law Center and specializes in Endangered Species Act litigation.'
WHITELIST:
- sangye@
- sij@
- singdancer@
- singdancer8@
- vanessa.nowitzky@
- theo.killian@
- singdancing
- synchronicity
BLACKLIST:
- '@westernlaw'
- pete
- sarah
- mcmillan
- erik
- schlenker
- esg
- sristi
- cascadia
- csnm
- smwc
- sodamtn@
- '@cascadia'
- oregonwild
TIMING:
URL: https://web.timingapp.com/api/v1
KEY: '{{ SECRET.TIMING }}'
LLM:
STATUS: ON
URL: http://localhost:11434
SYS: 'You are a helpful AI assistant.'
TPW: 1.3s
CHAT:
MODEL: dolphin-mistral
VISION:
MODEL: llava-llama3
DEFAULT_SUMMARY: dolphin-mistral
SUMMARY:
MODEL: llama3
CHUNK_SIZE: 4000
CHUNK_OVERLAP: 100
LENGTH_RATIO: 4
MIN_LENGTH: 150
TOKEN_LIMIT: 4096
INSTRUCT: 'You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
ASR:
STATUS: ON
DIR: 'whisper.cpp'
MODELS:
- small
- DIR.BASE
- DIR.BASE-en
- tiny
- medium
- medium-en
- large
- large-v2
- large-v3
TTS:
STATUS: ON
USE:
DEFAULT: XTTS
EMAIL: XTTS
WEBCLIP: 11L
RSS: XTTS
XTTS:
STATUS: ON
DEFAULT_VOICE: '{{ CONFIG.XTTS_DEFAULT_VOICE }}'
11L:
DEFAULT_VOICE: '{{ CONFIG.ELEVENLABS_DEFAULT_VOICE }}'
STATUS: '{{ STATUS.11L }}'
API_KEY: '{{ SECRET.ELEVENLABS }}'
SD:
START_COMMAND: '{{ CONFIG.COMFYUI_START_COMMAND }}'
URL: '{{ CONFIG.COMFYUI_URL }}'
DIR: '{{ HOME_DIR }}/workshop/sd/ComfyUI'
CONFIG: '{{ CONFIG_DIR }}/sd_config.yaml'
CF:
URL: 'https://api.cloudflare.com/client/v4'
TOKEN: '{{ SECRET.CLOUDFLARE }}'
CL:
URL: https://www.courtlistener.com
API_KEY: '{{ SECRET.COURTLISTENER }}'
DOCKETS: '{{ DIR.DATA }}/cl/dockets'
SEARCHES: '{{ DIR.DATA }}/cl/searches'
VC:
URL: 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline'
API_KEY: '{{ SECRET.VISUAL_CROSSING }}'
SSH:
PRIVATE_KEY: '{{ SECRET.SSH_PRIVATE }}'
PUBLIC_KEY: '{{ SECRET.SSH_PUBLIC }}'
MAC_HOST: '{{ SECRET.MAC_HOST }}'
MAC_UN: '{{ SECRET.MAC_USERNAME }}'
MAC_PW: '{{ SECRET.MAC_PASSWORD }}'
PGP:
PRIVATE_KEY: '{{ SECRET.PGP_PRIVATE }}'
PUBLIC_KEY: '{{ SECRET.PGP_PUBLIC }}'
CREATE_DIRS:
- '{{ DIR.LOGS }}'
- '{{ DIR.ALERTS }}'
- '{{ DIR.REQUESTS }}'
- '{{ DIR.DOCKETS }}'

View file

@ -0,0 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCikW67UW0RpncJ
h4Ha9HumZ/WzgEZWRWkgksVJIOJ8t1PftctizLUlz+xMWNl+Volp4crxnPpnczis
pOXU4g65XoFHHpF9nhF/3YDgxo5BDEM/mIIKEO9LFkIBQVBdE85qXnIVot5MfuNj
HeyEs7doRMBxilOSR/DkT8bTWu7m5yeHlF58iYVOxxssGhP3bo7CcAcaZD1LJBnP
Df+UBqzWQ9as903p5bFixHy2kVz8Qkd5k5tyIQ/tXqlhRfLLHG4AYHmBbS06CAg0
nEpKUeQx4l1J/ykAjQTwhHf70xv1z0p28mHcr5ib4UvpYK9fMM6FKWenwlqA3qrK
zQUJQ7E/AgMBAAECggEAQ5H/XIxzsSpnv+Y66y9DVd9QGNPwaFthXtCif8rTWNM6
YXnGl8JOaPELXpBvljuR0hivqc19pxIVNG01uk5boGDPiygBgRz6WRNQRh1Bc3gN
W5mgM17ml2cg+DSVmppo6X1oHeYcT99N1BzT+jRYv1YURx0fr2WHkt413hOlyQMR
b8ir/TOBx3olg4KBPDuysRC5BCIr3Mkz4jsh+9wVIOReKVezsy7nxJVzipcxOyZO
9VGgvlw4XLrRTOJEv4e3ldcg219j5KEGsJ4FFSziSmpj5fN4Vt+JmY7nueSHyL6P
3hX52lRfOcTXTEeiEV2cXkm3h8uQ3zfiZRYi3P0DQQKBgQDXGBZc3WnfXim0u1EV
JzZFwxBS7SHkyGgnd50ak6e9yDbdxOuYIOo4mBlc3ofd20EfT4TvR7Xyw+PD2fWJ
+isdwCEb9JZZ1H6RDGIzSDYXGNeGId4kMKBZdmKpEeLgStihsrYp/nxtwcE/8A7N
jCEKZj1ld7QfbQlGT/NJ4Jj80wKBgQDBfBpth6vMyCETKMJVqYd2qhVnJKiFLfRn
OD/Ck6xwUuedbfe9M34wNO3Pn2Xvu1xVsQGb2dmlT345Iq9Z1nbZCGXyY9yfLnTV
fz7F2utjUjaJtuiSb52SgX7MWZ8E4nbqqKnC4SYSIlaeuL9KK3r/x6bcNLAYPcdk
qKHexDkGZQKBgF0JGyshzhiChzGYUBMBOfVk0Ru9XAq0MHDZyQdk1Io/HpRAB9Nu
cUD3cQj9a/EnU/yyDYLeFrIhztO44/7BSYL9xpRr79h9FB2zKIqb8mF9KkPnREmN
Ct6HWVdd2C9B0H/oZ+i0HafvxaHdONnpgaUY4feQlkV9iSRzknzi++lnAoGAXOuu
/X80oMpUKBFhEyaxqemREdHnJN6nC5NV+6pUHDWUimSvn6vFJH2m4BlbKUC/3V9+
uExtXBjLM8FWmTyIIz8HRttyrvfuoEHV8ctrVG29R3ISS5FTCXMrZBR+bCgemB+c
N71NPVREaUGsjIBJN+G4XvTmxR2WTt81rfhqsokCgYEA1It9e9Ut2Krzf2FaPGLG
ozlKhWadMNntthg3uxze80Rx8WSvgJQdbVpdbni2B/9xdYBIIljW/LGivYBrCSSp
aXFpXL7ZGkvl3b0MkojfghIpXVGqu+8ISDtFgL0B1gZ5hq9xMBl94fLVfQgC9Cy6
uvDHlz+fjWaWKYUPiouAtVs=
-----END PRIVATE KEY-----

View file

@ -0,0 +1,19 @@
-----BEGIN CERTIFICATE-----
MIIDAzCCAeugAwIBAgIUc+EtilZslnS7N6MAx0u9HeP83wAwDQYJKoZIhvcNAQEL
BQAwETEPMA0GA1UEAwwGcHl0aG9uMB4XDTI0MDYwODEyNTcxM1oXDTI1MDYwODEy
NTcxM1owETEPMA0GA1UEAwwGcHl0aG9uMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8A
MIIBCgKCAQEAopFuu1FtEaZ3CYeB2vR7pmf1s4BGVkVpIJLFSSDifLdT37XLYsy1
Jc/sTFjZflaJaeHK8Zz6Z3M4rKTl1OIOuV6BRx6RfZ4Rf92A4MaOQQxDP5iCChDv
SxZCAUFQXRPOal5yFaLeTH7jYx3shLO3aETAcYpTkkfw5E/G01ru5ucnh5RefImF
TscbLBoT926OwnAHGmQ9SyQZzw3/lAas1kPWrPdN6eWxYsR8tpFc/EJHeZObciEP
7V6pYUXyyxxuAGB5gW0tOggINJxKSlHkMeJdSf8pAI0E8IR3+9Mb9c9KdvJh3K+Y
m+FL6WCvXzDOhSlnp8JagN6qys0FCUOxPwIDAQABo1MwUTAdBgNVHQ4EFgQUS74L
HD4Cdzh1ajatbvSHNQXIVvAwHwYDVR0jBBgwFoAUS74LHD4Cdzh1ajatbvSHNQXI
VvAwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAhpwtVubDjsyq
/LiTwpXKhjB/eFb6Yse782Iq+9rsiGGhsN88IA25fKgsJ2AIkR/KA7QSle3ds+1Q
EY9/vqpWnfBdpvOi7oV7ozBe+t/5JLu1GQBzg+cVa4iLAWYCiqg1d5NDdIcYMfsM
Yq2a3eQoP8Xbj3fFMXdNopXARa1d1zHB3ugXIJYinwMlS0EoGXVQVaHhemOh8GwW
keRaA6TDTBFsp0Gl4jv/NrisAt4qg+rlqr0mNcQK92vRX65mDWa/cQKwpUH8+Seq
Jl717NnsIGcqYWg8SSvVlkbFfxYhwYICXT824MAdSZtpHNCN/TegxsviYnlDyJKj
OJzn4fCxnQ==
-----END CERTIFICATE-----

View file

@ -0,0 +1 @@
{"token_type": "Bearer", "scope": "Calendars.Read Calendars.ReadWrite User.Read profile openid email", "expires_in": 3962, "ext_expires_in": 3962, "access_token": "eyJ0eXAiOiJKV1QiLCJub25jZSI6IldDeU91YXllN1RFX2FPM0F1alhlYmtvYTdVRHpUR1dVNWt5d3lJeDZ1MGciLCJhbGciOiJSUzI1NiIsIng1dCI6InE3UDFOdnh1R1F3RE4yVGFpTW92alo4YVp3cyIsImtpZCI6InE3UDFOdnh1R1F3RE4yVGFpTW92alo4YVp3cyJ9.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTAwMDAtYzAwMC0wMDAwMDAwMDAwMDAiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9iYWQ3ODA0OC1hNmUwLTQ3YjEtYTI0Yi00MDNjNDQ0YWEzNDkvIiwiaWF0IjoxNzE4Mzc0NzA5LCJuYmYiOjE3MTgzNzQ3MDksImV4cCI6MTcxODM3ODk3MiwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFWUUFxLzhYQUFBQVRnWHJ0Q1pCVjlPa1M2WldldHVVSHNMSFN0LzErYVcxT1BSSjVOWjJEL1Bzd05mY1Fxb0JTNEFZRmhLR3UvaE5TNnNWOGtLQUpmcDNNTzdqRUlNMEZrY1VaZ0IyREh4cWdOK3lUQVBUYnRVPSIsImFtciI6WyJwd2QiLCJtZmEiXSwiYXBwX2Rpc3BsYXluYW1lIjoicHl0aG9uIiwiYXBwaWQiOiJjZThjYmQyNC1mMTQ2LTRkYzctOGVlNy01MWQ5YjY5ZGVjNTkiLCJhcHBpZGFjciI6IjEiLCJmYW1pbHlfbmFtZSI6IkluY2UtSm9oYW5uc2VuIiwiZ2l2ZW5fbmFtZSI6IlNhbmd5ZSIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjY4LjIzNS40NC4yMDIiLCJuYW1lIjoiU2FuZ3llIEluY2UtSm9oYW5uc2VuIiwib2lkIjoiMWNiMWQwNDAtZmM1OS00MjMxLTllMDUtOWRjNGI0MzJjY2MxIiwicGxhdGYiOiI1IiwicHVpZCI6IjEwMDMyMDAyQTNGQjU4RjIiLCJyaCI6IjAuQVgwQVNJRFh1dUNtc1VlaVMwQThSRXFqU1FNQUFBQUFBQUFBd0FBQUFBQUFBQUMxQUk4LiIsInNjcCI6IkNhbGVuZGFycy5SZWFkIENhbGVuZGFycy5SZWFkV3JpdGUgVXNlci5SZWFkIHByb2ZpbGUgb3BlbmlkIGVtYWlsIiwic2lnbmluX3N0YXRlIjpbImttc2kiXSwic3ViIjoiV0FYVFdIR0puVFhBTjlncmIyamlEU3U4ZENOMmc0dDFacERiVHlwM1k3USIsInRlbmFudF9yZWdpb25fc2NvcGUiOiJOQSIsInRpZCI6ImJhZDc4MDQ4LWE2ZTAtNDdiMS1hMjRiLTQwM2M0NDRhYTM0OSIsInVuaXF1ZV9uYW1lIjoic2FuZ3llaWpAd2VzdGVybmxhdy5vcmciLCJ1cG4iOiJzYW5neWVpakB3ZXN0ZXJubGF3Lm9yZyIsInV0aSI6InFHcVlEODRzaDBHMFBfSEdldlVXQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfaWRyZWwiOiIxIDIiLCJ4bXNfc3QiOnsic3ViIjoieXhjdzFhV1FiM2VrX0FvNFRuRy11SDN6ZndGbVRRUmMxVGpFaEdqZ2p2WSJ9LCJ4bXNfdGNkdCI6MTY0ODY4MTc1Mn0.ssgIrbYo1SPNusoB9bNIB7pLxCmwBKhox__KOnwRRtnE63vbfGWAl53ww1KpNWPdDfC3p94yuPybTRqjZnTPluv1oJgGINml4AleUnZJnJttRsFHvGflzKOLtXnzmhQGUBXxu7QucKTCMH4J36neeQAWthITMwCHbaGmSy0RLotaIsoEHIufxR9ZEYD4XP5e3sFX54eSnyf4P3GgHHC1y5xxWUlemG4G1BRas8i7oX9o-gqRFube6BMtCLir_HMTNPfrCG-lhd9msLhc6e_WJSmLMHQ7RVLo-GlTMY9UouE190GzBBVKUrTg462I3kP_GayO1kt6qopBrwnF6bDUsw", "refresh_token": "0.AX0ASIDXuuCmsUeiS0A8REqjSSS9jM5G8cdNjudR2bad7Fm1AI8.AgABAwEAAAApTwJmzXqdR4BN2miheQMYAgDs_wUA9P_rTWFRiXWkWxvihyyXonsZPLrulRvnKKRlZ9PxKUltEOQsxjlg86xvCYzAS6dYeDBQiQxRAS_WEuuXVmTqUWDVwqgwQOa3BCbLwxQhPwfG-O9uFY6D239Jo8rdXTrf8XOntGs6fCn3wuo5kvJr2D-FGRA_EepltvRxZgrWdHROKuqoL_ArjLDdoFP7zM95MKhVYTmCO7LCM7u6O9ItU4_6y2_lH864zUivT1LFG8-h9sx0Ln3wd8LBP3P5GSeXwtQlkbNpj1FNDl_Ex5SwGCTM7uDHj0dn5CdUMgLkOcAC__HJdzmlEryTquoXcjd1RAmkq1MqAGD7QQreI7NQTZXwTcjoMwiBg92-bk-_o2ajeIVqzgOVBQIu1W8gkN2F7PAqRc5lGB-2mAXchqKMoL31CLUPxgTMBjWgR4waAjfZXT4h2WqXAAdGFy2nzUJAjyEQa9ZW1J5B6asCf3cVJQwI6nWIN7OphrXkGHl0ffpfrC-skVG3N2vrelAutRvyvWi4bbMqAZNglRrkTn5G_kULmnyydZBcFSc5uPmKD7OkfBD5UpTa_KLTjYexWRVsBfG9czIVxOh3ojnnza9BjrN5cHwHhzPM1t67E5iqronvT2OR_r-4BerUfRNHXrxwrLvDUEZwQ8o5IRs2N5FH0y_QN049o_NTgqytCj6wrIB4T-ZBUK2AsFej7ipdHAMYtWLZdoAo1o4nMuPBb4syN0VYd1sLUP-RQ5iv7wIkMWmNjhjIErIktZ134pGK9TlWa904H6HUin0qNTXyTmX2feE0nBlm6xJbO1ISfFkaf8aEjcAMfeu9qiArKQqUgvY", "expires_at": 1718378971}

View file

@ -0,0 +1,121 @@
{
"sij.ai": {
"zone_id": "9c00a9e0ff540308232eb5762621d5b1",
"subdomains": {
"www.sij.ai": "8a26b17923ac3a8f21b6127cdb3d7459",
"chat.sij.ai": "f2e6a3a25f58dae627c9982efeeff50f",
"ab.sij.ai": "458c8b1c4347d3037d83880b628cf1ce",
"s3.sij.ai": "b77d74526d244271fc132e728fee4f49",
"urls.sij.ai": "6b9525aae570ac4a920ad70cae06987c",
"api.sij.ai": "8a336ee8a5b13e112d6d4ae77c149bd6",
"dt.sij.ai": "7ab8343763799df690a8584c48a4e6c3",
"temp.sij.ai": "fa5190e2818df6362c7488a92227f4de",
"dns.sij.ai": "3e7494752833ec17f051ddb02d9e3a66",
"ftp.sij.ai": "94a29faa307efee2e0b941fa4ecc5b68",
"up.sij.ai": "e6ec3556d53851f09b211b46dc5242f1",
"txt.sij.ai": "b4b0bd48ac4272b1c48eb1624072adb2",
"ollama.sij.ai": "c589b5a830ac98f6351cdaf45d5fc491",
"ai.sij.ai": "77ce76efa702b55f7fcd886e77e0b4d5",
"khoj.sij.ai": "e8727aa222b4a866aaf8876c93a55668",
"img.sij.ai": "bf17d4eda2db05463a2170ae72fdc13d",
"git.sij.ai": "cd524c00b6daf824c933a294cb52eae2"
}
},
"sij.law": {
"zone_id": "5b68d9cd99b896e26c232f03cda89d66",
"subdomains": {
"www.sij.law": "ba9afd99deeb0407ea1b74ba88eb5564",
"map.sij.law": "4de8fe05bb0e722ee2c78b2ddf553c82",
"watch.sij.law": "4678d8d445ff8c62d01c846e9b90f2b7",
"dav.sij.law": "071df85a6198803a3bc56048ce088413",
"dt.sij.law": "6d7851639114bc07cd4ad6e85aa049e3",
"files.sij.law": "0b23d35ce534f2bda8dfb24c2eef25aa",
"hook.sij.law": "f531c5f80a89b473d3605266e02ccd2d",
"net.sij.law": "0cfc569acd53d381759eed0b9b6b8ebf",
"cloud.sij.law": "2c0e4536d0eae25ec253ca34a8028bc1",
"langtool.sij.law": "6bf5d51e1902140c6cca579c0b26f749",
"temp.sij.law": "66d8b110a6bd95889afb3139ed4fd499",
"archive.sij.law": "c03421e10a344279aa53cc2e2d15296c",
"rss.sij.law": "678a3d6c6bd17e4207ec183d0858ed78",
"keys.sij.law": "4ebf14f325757cbbcbc02bffdeaaa1cb",
"imap.sij.law": "384acd03c139ffaed37f4e70c627e7d1",
"smtp.sij.law": "0677e42ea9b589d67d1da21aa00455e0"
}
},
"lone.blue": {
"zone_id": "2a86fff4c35118fce68220cfc707077f",
"subdomains": {
"ai.lone.blue": "51dbf8d11716d838f7dc57fda32e175f",
"api.lone.blue": "d4a0a25b688f3871b1e215788dd69a0b",
"cloud.lone.blue": "5036ab6d7c1ca9feb2272545afb89b44",
"jackett.lone.blue": "a9f4614ea55772b674271c6a94119780",
"lone.blue": "35003b908c5870bdd3d69416aa9af6ee",
"pi.lone.blue": "cabb41432cef273cbc5eb50d28a152f9",
"pod.lone.blue": "c2383b5781ff7972f762eb43af5f8f0f",
"router.lone.blue": "4a775be78ccbefe165e5b195c648a8a4",
"rss.lone.blue": "4a775be78ccbefe165e5b195c648a8a4",
"s3.lone.blue": "3a34ad5507b112cf4e296281796cc5eb",
"vault.lone.blue": "b275c72f546f74b9264753d1952df092",
"whale.lone.blue": "267d2e23dcf46edef0a1e9bb7a7db9bc",
"ab.lone.blue": "85c3155bbd078971c4d2f7cca41ad510",
"dns.lone.blue": "e01b1bfa5696452b827fc5220b77fda8",
"chat.lone.blue": "c0a141ee64fb8bef5efc9c2556979e99",
"vector.lone.blue": "544082d1051a2a184112ef1f8c9ba389",
"jump.lone.blue": "67b523a20609337e44a643763cb86e9e",
"up.lone.blue": "79821180b99598660cebca6f1c2b0471",
"cdb.lone.blue": "8ce4d70a7ec1e1b9d625d890e2d7e9bb",
"fap.lone.blue": "6fff3cef2a7331fb718cc240c6217ed8"
}
},
"lone.earth": {
"zone_id": "abc8f28cfe88ebdfbf568d9ebf520e99",
"subdomains": {
"lone.earth": "2cf41011a69dc5ad8f2c9f73e1da51d0"
}
},
"subtle.host": {
"zone_id": "3b4bb8899f232b848ec4d1d78d81cb30",
"subdomains": {
"subtle.host": "3b4bb8899f232b848ec4d1d78d81cb30",
"code.subtle.host": "94046b2e9d3e2f1c28f83fbf5e64c1b6",
"dl.subtle.host": "465ea87e6a1d0a773b6142979fffccfb",
"llm.subtle.host": "59ba6a0d1990992a5539100d22fc6463",
"media.subtle.host": "c183b47a598253e66dcbf3250064bffe",
"qbt.subtle.host": "a465ac7849e2de8ef17267272178dca0",
"sync.subtle.host": "8449a402481913d5068ebf8eebdac079",
"vw.subtle.host": "5beb827873ab39467204c9becae3a929",
"zabbix.subtle.host": "9db9169f6099f54ee9ae6d4fd988d985",
"dns.subtle.host": "9e046ebc14ebcd082b0d87c86d6dd502",
"fileserver.subtle.host": "8ade682a91ad04da227aaf5af2fdcad8",
"st.subtle.host": "71aad71cfc05f8512366c1e5129baa8a",
"fap.subtle.host": "6e10970d8b962cb84b6ee951bf54730a",
"home.subtle.host": "78320db057c9a7b87586192203e2cdc1",
"jackett.subtle.host": "a4306475d9e8d4257cd7e8b113bf910c"
}
},
"env.esq": {
"zone_id": "faf889fd7c227c2e61875b2e70b5c6fe",
"subdomains": {
"api.env.esq": "b6da6ae8cdd376c1a0742a8b540d53df",
"cloud.env.esq": "cd394e73af0af91e4ddba9fe5b5f4db9",
"dav.env.esq": "e42852bf0f417b7eca727b87eb235568",
"dt.env.esq": "afbc205e829cfb8d3f79dab187c06f99",
"env.esq": "b9b636ce9bd4812a6564f572f0f373ee",
"minio.env.esq": "86a087ec53a98a06541589ef3720cfea",
"n8n.env.esq": "37850b2ba507ddceaab1e00050ae8155",
"nas.env.esq": "6ab124507384bb648cc33c06184e758b",
"pi.env.esq": "fbdf93acaf7e1a384c4f970e5ffb5a22",
"router.env.esq": "f2b9af49ea7b2843e3803bd2f0026aba",
"rss.env.esq": "f043d5cf485f4e53f9cbcb85fed2c861",
"s3.env.esq": "a5fa431a4be8f50af2c118aed353b0ec",
"dns.env.esq": "e10fbba777c90775a87aad47d342a0c1",
"sij.env.esq": "9ee66e66a516e21414d871e81f920a27",
"monitor.env.esq": "e03a7cd101805dec6b9d44203b31f27a",
"kopia.env.esq": "165b065140d314f0a9b34147d4730439",
"ftp.env.esq": "dd9dac2ff374f36de2113c291b709e4b",
"map.env.esq": "56142c1b040e8f2f05691b75d5b94b16",
"hook.env.esq": "6a7c14ef6394d23ee1a3db9de8b831ad",
"testing.env.esq": "103ada77c3d8d199ccf2622c63f5172a"
}
}
}

98
sijapi/config/config.py Normal file
View file

@ -0,0 +1,98 @@
import os
import yaml
from time import sleep
from pathlib import Path
import ipaddress
import yaml
class Config:
def __init__(self, yaml_file):
with open(yaml_file, 'r') as file:
self.data = yaml.safe_load(file)
def __getattr__(self, name):
if name in self.data:
value = self.data[name]
if isinstance(value, dict):
return ConfigSection(value)
return value
raise AttributeError(f"Config has no attribute '{name}'")
class ConfigSection:
def __init__(self, data):
self.data = data
def __getattr__(self, name):
if name in self.data:
value = self.data[name]
if isinstance(value, dict):
return ConfigSection(value)
return value
raise AttributeError(f"ConfigSection has no attribute '{name}'")
def __setattr__(self, name, value):
if name == 'data':
super().__setattr__(name, value)
else:
self.data[name] = value
# Load the YAML configuration file
CFG = Config('.config.yaml')
# Access existing attributes
print(CFG.API.PORT) # Output: localhost
def load_config():
yaml_file = os.path.join(os.path.dirname(__file__), ".config.yaml")
HOME_DIR = Path.home()
BASE_DIR = Path(__file__).resolve().parent.parent
CONFIG_DIR = BASE_DIR / "config"
ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts"
os.makedirs(ALERTS_DIR, exist_ok=True)
LOGS_DIR = BASE_DIR / "logs"
os.makedirs(LOGS_DIR, exist_ok=True)
REQUESTS_DIR = LOGS_DIR / "requests"
os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
DOC_DIR = DATA_DIR / "docs"
os.makedirs(DOC_DIR, exist_ok=True)
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
try:
with open(yaml_file, 'r') as file:
config_data = yaml.safe_load(file)
vars = {
"API": {
}
}
config = Config(config_data)
return config
except Exception as e:
print(f"Error while loading configuration: {e}")
return None
def reload_config():
while True:
global config
with open('config.yaml', 'r') as file:
config_data = yaml.safe_load(file)
config = Config(config_data)
sleep(300) # reload every 5 minutes

View file

@ -0,0 +1,151 @@
{
"Alpaca": {
"models": [
"mythomax",
"openhermes",
"deepseek"
],
"prefix": "\n### Instruction:\n",
"stops": [
"### Instruction"
],
"suffix": "\n### Response:\n",
"sysPrefix": "### System\n",
"sysSuffix": "\n"
},
"Amazon": {
"models": [
"mistrallite"
],
"prefix": "<|prompter|>",
"stops": [
"<|prompter|>",
"</s>"
],
"suffix": "</s><|assistant|>",
"sysPrefix": "",
"sysSuffix": ""
},
"ChatML": {
"models": [
"dolphin",
"capybara",
"nous-hermes-2"
],
"prefix": "<|im_end|>\n<|im_start|>user\n",
"stops": [
"<|im_end|>",
"<|im_start|>"
],
"suffix": "<|im_end|>\n<|im_start|>assistant\n",
"sysPrefix": "<|im_start|>system\n",
"sysSuffix": "<|im_end|>"
},
"Llama2": {
"models": [
"llama2-placeholder"
],
"prefix": "\n\n[INST] ",
"stops": [
"[/INST]",
"[INST]"
],
"suffix": "[/INST]\n\n",
"sysPrefix": "",
"sysSuffix": "\n\n"
},
"Mistral": {
"models": [
"mistral-instruct",
"mixtral-8x7b-instruct"
],
"prefix": "\n[INST] ",
"stops": [
"[/INST]",
"[INST]",
"</s>"
],
"suffix": "[/INST]\n",
"sysPrefix": "",
"sysSuffix": "\n<s>"
},
"Orca": {
"models": [
"upstage",
"neural",
"solar",
"SOLAR"
],
"prefix": "\n### User:\n",
"stops": [
"###",
"User:"
],
"suffix": "\n### Assistant:\n",
"sysPrefix": "### System:\n",
"sysSuffix": "\n"
},
"Phi2": {
"models": [
"phi-2"
],
"prefix": "\nSangye: ",
"stops": [
"###",
"User Message"
],
"suffix": "\nAssistant: ",
"sysPrefix": "Systen: ",
"sysSuffix": "\n"
},
"Phind": {
"models": [
"phind"
],
"prefix": "\n### User Message\n",
"stops": [
"###",
"User Message"
],
"suffix": "\n### Assistant\n",
"sysPrefix": "### System Prompt\n",
"sysSuffix": "\n"
},
"Vicuna": {
"models": [
"xwin",
"synthia",
"tess"
],
"prefix": "\nUSER: ",
"stops": [
"</s>",
"USER:",
"SYSTEM:"
],
"suffix": "</s>\nASSISTANT: ",
"sysPrefix": "SYSTEM: ",
"sysSuffix": "\n"
},
"Zephyr": {
"models": [
"zephyr"
],
"prefix": " ",
"stops": [
"</s>"
],
"suffix": "</s>\n ",
"sysPrefix": " ",
"sysSuffix": "</s>\n"
},
"default": {
"prefix": "\n### Instruction:\n",
"stops": [
"### Instruction"
],
"suffix": "\n### Response:\n",
"sysPrefix": "### System\n",
"sysSuffix": "\n"
}
}

View file

@ -0,0 +1,43 @@
{
"scenes": [
{
"scene": "default",
"triggers": [""],
"API_PPrompt": "(Highly-detailed) image of ",
"API_SPrompt": "; ((masterpiece)); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "`oil, paint splash, oil effect, dots, paint, freckles, liquid effect, canvas frame, 3d, bad art, asian, illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, explicit, topless`",
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this scene description to its essence, staying true to what it describes: ",
"workflows": [{"workflow": "turbo.json", "size": "1024x768"}]
},
{
"scene": "portrait",
"triggers": [
"portrait",
"profile",
"headshot"
],
"API_PPrompt": "Highly-detailed portrait photo of ",
"API_SPrompt": "; attractive, cute, (((masterpiece))); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, nude",
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic portrait photos. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on the pictured individual's eyes, pose, and other distinctive features. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words. Remember that the final product will be a still image, and action verbs are not as helpful as simple descriptions of position, appearance, background, etc.",
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this portrait photo to its essence: ",
"workflows": [
{
"workflow": "selfie.json",
"size": "768x1024"
}
]
},
{
"scene": "wallpaper",
"triggers": ["wallpaper"],
"API_PPrompt": "Stunning widescreen image of ",
"API_SPrompt": ", masterpiece, (subtle:0.7), (nuanced:0.6), best quality, ultra detailed, ultra high resolution, 8k, (documentary:0.3), cinematic, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, earthporn, (eliot porter:0.6), (frans lanting:0.4), (daniel kordan:0.6), landscapephotography, ultra detailed, earth tones, moody",
"API_NPrompt": "FastNegativeV2, (easynegative:0.5), canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, Photoshop, video game, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, (Thomas Kinkade:0.5), sentimental, kitsch, kitschy, twee, commercial, holiday card, modern, futuristic, urban, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
"LLM_SysMsg": "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"LLM_PrePrompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. You can select any place on Earth that a young model from the Pacific Northwest is likely to travel to. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. This model is especially fond of wild and rugged places, mountains. She favors dark muted earth tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instread strive for nuance and originality in composition and environment.",
"workflows": [{"workflow": "landscape.json", "size": "1160x768"}]
}
]
}

102
sijapi/config/sd.json Normal file
View file

@ -0,0 +1,102 @@
{
"scenes": [
{
"scene": "default",
"triggers": [""],
"API_PPrompt": "(Highly-detailed) image of ",
"API_SPrompt": "; ((masterpiece)); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "`oil, paint splash, oil effect, dots, paint, freckles, liquid effect, canvas frame, 3d, bad art, asian, illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, explicit, topless`",
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this scene description to its essence, staying true to what it describes: ",
"workflows": [{"workflow": "turbo.json", "size": "1024x768"}]
},
{
"scene": "landscape",
"triggers": ["lanscape", "vista", "scenic", "pastoral", "mountains", "outdoor", "outside", "adventure"],
"API_PPrompt": "Moody landscape photograph of ",
"API_SPrompt": ", masterpiece, (cinematic:0.5), beautiful lighting, dynamic lighting, (subtle:0.4), (nuanced:0.3), subdued, fine detail, best quality, ultra detailed, ultra high resolution, 8k, (documentary:0.3), cinematic, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, pastoral, earthporn, (eliot porter:0.6), (frans lanting:0.4), (daniel kordan:0.6), landscapephotography, ultra detailed, extremely sharp, insane detail, 8k, earth tones, moody",
"API_NPrompt": "FastNegativeV2, (easynegative:0.5), canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, Photoshop, video game, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, (Thomas Kinkade:0.5), sentimental, kitsch, kitschy, twee, commercial, holiday card, modern, futuristic, urban, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"LLM_PrePrompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. You can select any place on Earth that a young model from the Pacific Northwest is likely to travel to. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. This model is especially fond of wild and rugged places, mountains. She favors dark muted earth tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instread strive for nuance and originality in composition and environment.",
"workflows": [{"workflow": "landscape.json", "size": "1080x800"}]
},
{
"scene": "wallpaper",
"triggers": ["landscape", "vista", "scenic", "pastoral", "mountains", "outdoor", "outside", "adventure"],
"API_PPrompt": "",
"API_SPrompt": ", masterpiece, cinematic, beautiful lighting, subtle, nuanced, fine detail, best quality, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, pastoral, earthporn, landscapephotography, ultra detailed, extremely sharp, insane detail, 8k, earth tones, moody",
"API_NPrompt": "FastNegativeV2, easynegative, canvas frame, 3d, bad art, illustrated, deformed, blurry, duplicate, video game, render, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, Thomas Kinkade, sentimental, kitsch, kitschy, twee, commercial, holiday card, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visuallnsive_wilsony descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"LLM_PrePrompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. Favors dark tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instead strive for nuance and originality in composition and environment.",
"workflows": [{"workflow": "wallpaper.json", "size": "1024x512"}]
},
{
"scene": "portrait",
"triggers": [
"portrait",
"profile",
"headshot"
],
"API_PPrompt": "Highly-detailed portrait photo of ",
"API_SPrompt": "; attractive, cute, (((masterpiece))); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, nude",
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic portrait photos. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on the pictured individual's eyes, pose, and other distinctive features. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words. Remember that the final product will be a still image, and action verbs are not as helpful as simple descriptions of position, appearance, background, etc.",
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this portrait photo to its essence: ",
"workflows": [
{
"workflow": "selfie.json",
"size": "768x1024"
}
]
},
{
"scene": "doggystyle",
"triggers": [
"doggystyle",
"doggy-style",
"doggy style",
"from behind"
],
"API_PPrompt": "Explicit highly-detailed image of ",
"API_SPrompt": "; ((from behind)), (((doggystyle))), explicit, ((tiny breasts)), flat chest, (((young nude girl))), cute, (covered in cum), sex, porn, nsfw, (((masterpiece))); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "canvas frame, 3d, ((bad art)), ((asian)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, censored, pg13",
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic erotic/pornographic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on 18+ details concerning body parts, position, etc. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words. Remember that the final product will be a still image, and action verbs are not as helpful as simple descriptions of position, appearance, body parts and fluids, etc.",
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this pornographic scene description of doggystyle sex to its erotic essence, staying true to what it describes no matter how kinky or taboo: ",
"workflows": [
{
"workflow": "xxx/doggy.json",
"size": "1024x768",
"preset_values": {
"node": "133",
"type": "depth_map",
"key": "image",
"values": [
"xxx/doggy/DOGGY_001.png",
"xxx/doggy/DOGGY_002.png",
"xxx/doggy/DOGGY_003.png"
]
}
}
]
},
{
"scene": "nsfw",
"triggers": [
"nude",
"naked",
"undressed"
],
"API_PPrompt": "Explicit highly-detailed image of ",
"API_SPrompt": "; ((tiny breasts)), flat chest, (((young nude girl))), cute, nsfw, (((masterpiece))); ((beautiful lighting), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "canvas frame, 3d, ((bad art)), ((asian)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, censored, pg13",
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic erotic art. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on details concerning body parts, position, etc. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts.",
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this image of a young girl or woman to its erotic essence: ",
"workflows": [
{
"workflow": "nude.json",
"size": "768x1024"
}
]
}
]
}

Binary file not shown.

View file

@ -0,0 +1,2 @@
#!/bin/bash
osascript /Users/sij/workshop/sijapi/helpers/updateCal.scpt

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,195 @@
from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, status
from fastapi.responses import JSONResponse
import httpx
import json
from pathlib import Path
import asyncio
from datetime import datetime
import os, io
from PyPDF2 import PdfReader
import aiohttp
hook = FastAPI()
# /Users/sij/Library/CloudStorage/OneDrive-WELC/Documents - WELC-Docket
SYNC_FOLDER = Path(__file__).resolve().parent.parent
HOME_FOLDER = Path.home()
DOCKETS_FOLDER = HOME_FOLDER / "Dockets"
SEARCH_FOLDER = HOME_FOLDER / "Watched Cases"
SCRIPTS_FOLDER = SYNC_FOLDER / ".scripts"
REQUESTS_FOLDER = HOME_FOLDER / "sync" / "requests"
COURTLISTENER_BASE_URL = "https://www.courtlistener.com"
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
COURTLISTENER_API_KEY = "efb5fe00f3c6c88d65a32541260945befdf53a7e"
with open(SCRIPTS_FOLDER / 'caseTable.json', 'r') as file:
CASE_TABLE = json.load(file)
@hook.get("/health")
async def health():
return {"status": "ok"}
@hook.post("/cl/docket")
async def respond(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
logging.info(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_docket.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(process_docket, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def process_docket(result):
async with httpx.AsyncClient() as session:
await process_docket_result(result, session)
async def process_docket_result(result, session):
docket = str(result.get('docket'))
case_code, case_shortname = get_case_details(docket)
date_filed = result.get('date_filed', 'No Date Filed')
try:
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
except ValueError:
date_filed_formatted = 'NoDateFiled'
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
logging.info(f"Fetching CourtListener docket information for {docket}...")
data = await response.json()
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name']
logging.info(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else:
logging.info("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket'
case_name = 'NoCaseName'
for document in result.get('recap_documents', []):
filepath_ia = document.get('filepath_ia')
filepath_local = document.get('filepath_local')
if filepath_ia:
file_url = filepath_ia
logging.info(f"Found IA file at {file_url}.")
elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
logging.info(f"Found local file at {file_url}.")
else:
logging.info(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue
document_number = document.get('document_number', 'NoDocumentNumber')
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
description = description[:50] # Truncate description
# case_shortname = case_name # TEMPORARY OVERRIDE
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
target_path = Path(DOCKETS_FOLDER) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await download_file(file_url, target_path, session)
logging.info(f"Downloaded {file_name} to {target_path}")
def get_case_details(docket):
case_info = CASE_TABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
case_code = case_info.get("code")
short_name = case_info.get("shortname")
return case_code, short_name
async def download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
async with aiohttp.ClientSession() as session:
logging.info(f"Attempting to download {url} to {path}.")
try:
async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403:
logging.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return
response.raise_for_status()
# Check if the response content type is a PDF
content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf':
logging.error(f"Invalid content type: {content_type}. Skipping download.")
return
# Create an in-memory buffer to store the downloaded content
buffer = io.BytesIO()
async for chunk in response.content.iter_chunked(1024):
buffer.write(chunk)
# Reset the buffer position to the beginning
buffer.seek(0)
# Validate the downloaded PDF content
try:
PdfReader(buffer)
except Exception as e:
logging.error(f"Invalid PDF content: {str(e)}. Skipping download.")
return
# If the PDF is valid, write the content to the file on disk
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('wb') as file:
file.write(buffer.getvalue())
except Exception as e:
logging.error(f"Error downloading file: {str(e)}")
@hook.post("/cl/search")
async def respond_search(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
logging.info(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
# Save the payload data
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_search.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(process_search_result, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def process_search_result(result):
async with httpx.AsyncClient() as session:
download_url = result.get('download_url')
court_id = result.get('court_id')
case_name_short = result.get('caseNameShort')
case_name = result.get('caseName')
logging.info(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id
if case_name_short:
case_folder = case_name_short
else:
case_folder = case_name
file_name = download_url.split('/')[-1]
target_path = Path(SEARCH_FOLDER) / court_folder / case_folder / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await download_file(download_url, target_path, session)
logging.info(f"Downloaded {file_name} to {target_path}")

View file

@ -0,0 +1,120 @@
import json
import aiohttp
import asyncio
import logging
from pathlib import Path
from datetime import datetime
from selenium import webdriver
import shutil
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import time
import zipfile
import os
import re
import zipfile
import os
import re
import shutil
import requests
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
COURTLISTENER_API_KEY = "efb5fe00f3c6c88d65a32541260945befdf53a7e"
SYNC_FOLDER = Path(__file__).resolve().parent.parent
DOCKETS_FOLDER = SYNC_FOLDER / "Documents - WELC-Docket"
SELENIUM_HOST = "http://10.13.37.11:4646"
selenium_remote_url = f"{SELENIUM_HOST}/wd/hub"
with open('caseTable.json') as f:
CASE_TABLE = json.load(f)
def process_docket_result(docket_id):
case_code = CASE_TABLE[docket_id].get("code")
case_shortname = CASE_TABLE[docket_id].get("shortname")
case_court = CASE_TABLE[docket_id].get("court") # docket_info.get("court")
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket_id}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
response = requests.get(url, headers=headers)
if response.status_code == 200:
logging.info(f"Fetching CourtListener docket information for {docket_id}...")
data = response.json()
absolute_url = data['results'][0]['absolute_url']
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
date_filed = data['results'][0]['date_filed']
pacer_case_id = data['results'][0]['pacer_case_id']
logging.info(f"Obtained from CourtListener: docket {court_docket}, date filed {date_filed}.")
else:
logging.info("Failed to fetch data from CourtListener API.")
return
download_url_to_try = f"https://archive.org/compress/gov.uscourts.{case_court}.{pacer_case_id}/formats=TEXT%20PDF&file=/gov.uscourts.{case_court}.{pacer_case_id}.zip"
print(f"\n{download_url_to_try}\n")
directory_path = os.path.join(DOCKETS_FOLDER, case_shortname, "Docket")
# Create the directory if it doesn't exist
os.makedirs(directory_path, exist_ok=True)
target_path = os.path.join(directory_path, case_code + "_Archive.zip")
if os.path.exists(target_path):
logging.info(f"Using existing archive: {target_path}")
else:
response = requests.get(download_url_to_try)
if response.status_code == 200:
with open(target_path, 'wb') as f:
f.write(response.content)
logging.info(f"Downloaded archive: {target_path}")
time.sleep(3)
else:
logging.info(f"Failed to download archive from {download_url_to_try}")
return
# Extract the contents of the ZIP file
with zipfile.ZipFile(target_path, 'r') as zip_ref:
for member in zip_ref.infolist():
filename = os.path.basename(member.filename)
if filename.endswith('.pdf'):
# Extract the docket entry from the filename
match = re.search(r'(\d+)\.(\d+)\.pdf$', filename)
if match:
docket_entry_num = match.group(1)
docket_entry_dec = match.group(2)
if docket_entry_dec == '0':
docket_entry = docket_entry_num
else:
docket_entry = f"{docket_entry_num}.{docket_entry_dec}"
# Construct the new filename
new_filename = f"{case_code}_{docket_entry}.pdf"
target_file_path = os.path.join(directory_path, new_filename)
# Extract the file with the new filename
with open(target_file_path, 'wb') as target_file:
with zip_ref.open(member) as source:
shutil.copyfileobj(source, target_file)
logging.info(f"Extracted {filename} as {new_filename}")
def main():
docket_ids = list(CASE_TABLE.keys())
for docket_id in docket_ids:
process_docket_result(docket_id)
time.sleep(3)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,153 @@
import json
import aiohttp
import asyncio
import logging
from pathlib import Path
from datetime import datetime
from selenium import webdriver
import shutil
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import time
import zipfile
import os
import re
import zipfile
import os
import re
import shutil
import requests
from PyPDF2 import PdfReader
from dotenv import load_dotenv
load_dotenv()
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
COURTLISTENER_DOCKETS_URL = os.getenv("COURTLISTENER_DOCKETS_URL")
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
SYNC_FOLDER = Path(__file__).resolve().parent.parent
DOCKETS_FOLDER = os.getenv("DOCKETS_FOLDER")
SELENIUM_HOST=os.getenv("SELENIUM_HOST")
selenium_remote_url = f"{SELENIUM_HOST}/wd/hub"
with open('caseTable.json') as f:
CASE_TABLE = json.load(f)
def extract_date_from_pdf(pdf_path):
with open(pdf_path, 'rb') as file:
reader = PdfReader(file)
page = reader.pages[0]
text = page.extract_text()
lines = text.split('\n')
for line in lines[:2]:
match = re.search(r'\b(\d{1,2})-(\d{1,2})-(\d{2})\b', line)
if match:
month, day, year = match.groups()
if len(year) == 2:
if int(year) > 24:
year = '19' + year
else:
year = '20' + year
date_str = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
file_date = datetime.strptime(date_str, "%Y-%m-%d")
if file_date >= datetime(1924, 5, 1) and file_date <= datetime(2024, 4, 30):
return file_date.strftime("%Y%m%d")
return None
def process_docket_result(docket_id):
case_code = CASE_TABLE[docket_id].get("code")
case_shortname = CASE_TABLE[docket_id].get("shortname")
case_court = CASE_TABLE[docket_id].get("court") # docket_info.get("court")
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket_id}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
response = requests.get(url, headers=headers)
if response.status_code == 200:
logging.info(f"Fetching CourtListener docket information for {docket_id}...")
data = response.json()
absolute_url = data['results'][0]['absolute_url']
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
date_filed = data['results'][0]['date_filed']
pacer_case_id = data['results'][0]['pacer_case_id']
logging.info(f"Obtained from CourtListener: docket {court_docket}, date filed {date_filed}.")
else:
logging.info("Failed to fetch data from CourtListener API.")
return
download_url_to_try = f"https://archive.org/compress/gov.uscourts.{case_court}.{pacer_case_id}/formats=TEXT%20PDF&file=/gov.uscourts.{case_court}.{pacer_case_id}.zip"
print(f"\n{download_url_to_try}\n")
directory_path = os.path.join(DOCKETS_FOLDER, case_shortname, "Docket")
# Create the directory if it doesn't exist
os.makedirs(directory_path, exist_ok=True)
target_path = os.path.join(directory_path, case_code + "_Archive.zip")
if os.path.exists(target_path):
logging.info(f"Using existing archive: {target_path}")
else:
response = requests.get(download_url_to_try)
if response.status_code == 200:
with open(target_path, 'wb') as f:
f.write(response.content)
logging.info(f"Downloaded archive: {target_path}")
time.sleep(3)
else:
logging.info(f"Failed to download archive from {download_url_to_try}")
return
# Extract the contents of the ZIP file
with zipfile.ZipFile(target_path, 'r') as zip_ref:
for member in zip_ref.infolist():
filename = os.path.basename(member.filename)
if filename.endswith('.pdf'):
# Extract the docket entry from the filename
match = re.search(r'(\d+)\.(\d+)\.pdf$', filename)
if match:
docket_entry_num = match.group(1)
docket_entry_dec = match.group(2)
if docket_entry_dec == '0':
docket_entry = docket_entry_num
else:
docket_entry = f"{docket_entry_num}.{docket_entry_dec}"
# Extract the date from the first two lines of the PDF
with zip_ref.open(member) as source:
temp_file_path = os.path.join(directory_path, 'temp.pdf')
with open(temp_file_path, 'wb') as temp_file:
shutil.copyfileobj(source, temp_file)
pdf_date = extract_date_from_pdf(temp_file_path)
os.remove(temp_file_path)
# Construct the new filename
if pdf_date:
new_filename = f"{case_code}_{docket_entry}_{pdf_date}.pdf"
else:
new_filename = f"{case_code}_{docket_entry}.pdf"
target_file_path = os.path.join(directory_path, new_filename)
# Extract the file with the new filename
with open(target_file_path, 'wb') as target_file:
with zip_ref.open(member) as source:
shutil.copyfileobj(source, target_file)
logging.info(f"Extracted {filename} as {new_filename}")
def main():
docket_ids = list(CASE_TABLE.keys())
for docket_id in docket_ids:
process_docket_result(docket_id)
time.sleep(3)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,32 @@
import json
import requests
# Load the caseTable.json file
with open('caseTable.json', 'r') as file:
case_table = json.load(file)
# Set the base URL and authorization token
base_url = "https://www.courtlistener.com/api/rest/v3/docket-alerts/"
auth_token = "a90d3f2de489aa4138a32133ca8bfec9d85fecfa"
# Iterate through each key (docket ID) in the case table
for docket_id in case_table.keys():
# Set the data payload and headers for the request
data = {'docket': docket_id}
headers = {'Authorization': f'Token {auth_token}'}
try:
# Send the POST request to the CourtListener API
response = requests.post(base_url, data=data, headers=headers)
# Check the response status code
if response.status_code == 200:
print(f"Successfully created docket alert for docket ID: {docket_id}")
else:
print(f"Failed to create docket alert for docket ID: {docket_id}")
print(f"Status code: {response.status_code}")
print(f"Response content: {response.content}")
except requests.exceptions.RequestException as e:
print(f"Error occurred while creating docket alert for docket ID: {docket_id}")
print(f"Error message: {str(e)}")

View file

@ -0,0 +1,146 @@
#!/bin/bash
DB_NAME="weatherlocate.db"
# Step 1: Backup existing data
echo "Backing up existing data..."
sqlite3 $DB_NAME <<EOF
.headers on
.mode csv
.output hourly_weather_backup.csv
SELECT * FROM HourlyWeather;
.output daily_weather_backup.csv
SELECT * FROM DailyWeather;
.output hours_backup.csv
SELECT * FROM Hours;
.output days_backup.csv
SELECT * FROM Days;
EOF
# Step 2: Drop and recreate tables
echo "Dropping and recreating tables..."
sqlite3 $DB_NAME <<EOF
DROP TABLE IF EXISTS HourlyWeather;
DROP TABLE IF EXISTS DailyWeather;
DROP TABLE IF EXISTS Hours;
DROP TABLE IF EXISTS Days;
CREATE TABLE HourlyWeather (
id INTEGER PRIMARY KEY,
datetime TEXT NOT NULL,
temp REAL,
feelslike REAL,
humidity REAL,
dew REAL,
precip REAL,
precipprob REAL,
snow REAL,
snowdepth REAL,
windgust REAL,
windspeed REAL,
winddir REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
uvindex REAL,
severerisk REAL,
conditions TEXT,
icon TEXT,
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE DailyWeather (
id INTEGER PRIMARY KEY,
sunrise_time TEXT,
sunset_time TEXT,
description TEXT,
tempmax REAL,
tempmin REAL,
uvindex REAL,
winddir REAL,
windspeedmean REAL,
windspeed REAL,
icon TEXT,
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE Hours (
id INTEGER PRIMARY KEY,
day_id INTEGER,
hour INTEGER,
hourly_weather_id INTEGER,
FOREIGN KEY (day_id) REFERENCES Days(id),
FOREIGN KEY (hourly_weather_id) REFERENCES HourlyWeather(id)
);
CREATE TABLE Days (
id INTEGER PRIMARY KEY,
date TEXT NOT NULL,
daily_weather_id INTEGER,
FOREIGN KEY (daily_weather_id) REFERENCES DailyWeather(id)
);
EOF
# Step 3: Import data from backup files
echo "Importing data from backup files..."
python3 <<EOF
import sqlite3
import csv
from datetime import datetime
def import_data():
conn = sqlite3.connect('$DB_NAME')
cursor = conn.cursor()
with open('hourly_weather_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO HourlyWeather (datetime, temp, feelslike, humidity, dew, precip, precipprob, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy, uvindex, severerisk, conditions, icon, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
row['datetime'], row['temp'], row['feelslike'], row['humidity'], row['dew'], row['precip'],
row['precipprob'], row['snow'], row['snowdepth'], row['windgust'], row['windspeed'], row['winddir'],
row['pressure'], row['cloudcover'], row['visibility'], row['solarradiation'], row['solarenergy'], row['uvindex'],
row['severerisk'], row['conditions'], row['icon'],
datetime.strptime(row['last_updated'], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
))
with open('daily_weather_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO DailyWeather (sunrise_time, sunset_time, description, tempmax, tempmin, uvindex, winddir, windspeedmean, windspeed, icon, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
row['sunrise_time'], row['sunset_time'], row['description'], row['tempmax'], row['tempmin'],
row['uvindex'], row['winddir'], row['windspeedmean'], row['windspeed'], row['icon'],
datetime.strptime(row['last_updated'], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
))
with open('hours_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO Hours (day_id, hour, hourly_weather_id)
VALUES (?, ?, ?)
''', (row['day_id'], row['hour'], row['hourly_weather_id']))
with open('days_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO Days (date, daily_weather_id)
VALUES (?, ?)
''', (row['date'], row['daily_weather_id']))
conn.commit()
conn.close()
import_data()
EOF
echo "Database rebuild complete."

View file

@ -0,0 +1,123 @@
import sqlite3
from pathlib import Path
# Get the home directory
home_dir = Path.home()
# Define the path to the database
DB = home_dir / "sync" / "sijapi" / "data" / "weatherlocate.db"
def create_database():
with sqlite3.connect(DB) as conn:
cursor = conn.cursor()
# Create the Locations table
cursor.execute('''
CREATE TABLE IF NOT EXISTS Locations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
street TEXT,
city TEXT,
state TEXT,
country TEXT,
latitude REAL,
longitude REAL,
zip TEXT,
elevation REAL,
last_updated DATETIME
);
''')
# Create the Days table with a direct reference to DailyWeather
cursor.execute('''
CREATE TABLE IF NOT EXISTS Days (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date DATE UNIQUE NOT NULL,
daily_weather_id INTEGER,
general_location_id INTEGER,
FOREIGN KEY(daily_weather_id) REFERENCES DailyWeather(id),
FOREIGN KEY(general_location_id) REFERENCES Locations(id)
);
''')
# Create the DailyWeather table with fields adjusted for direct CSV storage of preciptype
cursor.execute('''
CREATE TABLE IF NOT EXISTS DailyWeather (
id INTEGER PRIMARY KEY AUTOINCREMENT,
sunrise TEXT,
sunriseEpoch TEXT,
sunset TEXT,
sunsetEpoch TEXT,
description TEXT,
tempmax REAL,
tempmin REAL,
uvindex INTEGER,
winddir REAL,
windspeed REAL,
icon TEXT,
last_updated DATETIME,
datetime TEXT,
datetimeEpoch INTEGER,
temp REAL,
feelslikemax REAL,
feelslikemin REAL,
feelslike REAL,
dew REAL,
humidity REAL,
precip REAL,
precipprob REAL,
precipcover REAL,
preciptype TEXT,
snow REAL,
snowdepth REAL,
windgust REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
severerisk REAL,
moonphase REAL,
conditions TEXT,
stations TEXT,
source TEXT
);
''')
# Create the HourlyWeather table
cursor.execute('''
CREATE TABLE IF NOT EXISTS HourlyWeather (
id INTEGER PRIMARY KEY AUTOINCREMENT,
day_id INTEGER,
datetime TEXT,
datetimeEpoch INTEGER,
temp REAL,
feelslike REAL,
humidity REAL,
dew REAL,
precip REAL,
precipprob REAL,
snow REAL,
snowdepth REAL,
preciptype TEXT,
windgust REAL,
windspeed REAL,
winddir REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
uvindex REAL,
severerisk REAL,
conditions TEXT,
icon TEXT,
stations TEXT,
source TEXT,
FOREIGN KEY(day_id) REFERENCES Days(id)
);
''')
conn.commit()
if __name__ == "__main__":
create_database()

View file

@ -0,0 +1,30 @@
from fastapi import FastAPI, HTTPException
from typing import List
import sqlite3
app = FastAPI()
def get_db_connection():
conn = sqlite3.connect('tracking.db')
conn.row_factory = sqlite3.Row # This enables column access by name: row['column_name']
return conn
@app.get("/location/{date}", response_model=List[str])
async def read_location_zip(date: str):
conn = get_db_connection()
cursor = conn.cursor()
print(f"Querying for date: {date}") # Debugging output
cursor.execute('''
SELECT L.zip FROM Hours H
JOIN Days D ON H.day_id = D.id
JOIN Locations L ON H.location_id = L.id
WHERE D.date = ?
''', (date,))
zips = cursor.fetchall()
print(f"Found zip codes: {zips}") # Debugging output
conn.close()
if not zips:
raise HTTPException(status_code=404, detail="No location data found for this date")
return [zip[0] for zip in zips]

View file

@ -0,0 +1,89 @@
import osmium
import psycopg2
import json
from sijapi import DB_USER, DB_PASS, DB_HOST, DB, DATA_DIR
OSM_DATA_PATH = DATA_DIR / "north-america-latest.osm.pbf"
class OSMHandler(osmium.SimpleHandler):
def __init__(self, conn):
osmium.SimpleHandler.__init__(self)
self.conn = conn
def node(self, n):
tags = {tag.k: tag.v for tag in n.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO nodes (id, location, tags)
VALUES (%s, ST_SetSRID(ST_MAKEPOINT(%s, %s),4326), %s)
""",
(n.id, n.location.lon, n.location.lat, json.dumps(tags)))
self.conn.commit()
def way(self, w):
nodes = [(node.lon, node.lat) for node in w.nodes]
tags = {tag.k: tag.v for tag in w.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO ways (id, nodes, tags)
VALUES (%s, %s, %s)
""",
(w.id, json.dumps(nodes), json.dumps(tags)))
self.conn.commit()
def relation(self, r):
members = [{"type": m.type, "ref": m.ref, "role": m.role} for m in r.members]
tags = {tag.k: tag.v for tag in r.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO relations (id, members, tags)
VALUES (%s, %s, %s)
""",
(r.id, json.dumps(members), json.dumps(tags)))
self.conn.commit()
def main():
conn = psycopg2.connect(user=DB_USER, password=DB_PASS, dbname=DB, host=DB_HOST)
cur = conn.cursor()
# Drop existing tables if they exist
cur.execute("DROP TABLE IF EXISTS nodes")
cur.execute("DROP TABLE IF EXISTS ways")
cur.execute("DROP TABLE IF EXISTS relations")
# Create tables for nodes, ways, and relations
cur.execute("""
CREATE TABLE nodes (
id bigint PRIMARY KEY,
location geography(POINT, 4326),
tags jsonb
)
""")
cur.execute("""
CREATE TABLE ways (
id bigint PRIMARY KEY,
nodes jsonb,
tags jsonb
)
""")
cur.execute("""
CREATE TABLE relations (
id bigint PRIMARY KEY,
members jsonb,
tags jsonb
)
""")
conn.commit()
handler = OSMHandler(conn)
handler.apply_file(str(OSM_DATA_PATH))
cur.close()
conn.close()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,95 @@
import sqlite3
import csv
import logging
import sqlite3
def create_geonames_table(conn):
cursor = conn.cursor()
# Create table with required columns for geocode_location function
cursor.execute("""
CREATE TABLE IF NOT EXISTS geonames (
zip TEXT,
city TEXT,
state TEXT,
country_code TEXT,
latitude TEXT,
longitude TEXT,
region TEXT DEFAULT NULL,
altitude TEXT DEFAULT NULL,
street TEXT DEFAULT NULL
);
""")
conn.commit()
# Data importation
def import_geonames_data(conn, file_path):
cursor = conn.cursor()
with open(file_path, 'r', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
next(reader) # Skip the header row
for row in reader:
if len(row) < 7: # Ensuring there are enough columns
logging.warning("Skipped a line due to insufficient data.")
continue
try:
cursor.execute("""
INSERT INTO geonames (zip, latitude, longitude, city, state, region, country_code, altitude, street)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
""", (row[0], row[1], row[2], row[3], row[4], row[5], 'US', None, None))
except sqlite3.DatabaseError as e:
logging.error(f"Database error: {e}")
conn.commit()
# Geocode location based on ZIP
def geocode_location(conn, zip_code):
cursor = conn.cursor()
query = """SELECT latitude, longitude FROM geonames WHERE zip = ?;"""
cursor.execute(query, (zip_code,))
result = cursor.fetchone()
if result:
return result
else:
return None, None
# Test function to validate database setup and query
def test_geocode_function(conn):
# This tests a known ZIP code; replace '97401' with a ZIP code from your data
latitude, longitude = geocode_location(conn, '97401')
if latitude and longitude:
print(f"Test passed! Latitude: {latitude}, Longitude: {longitude}")
else:
print("Test failed. No data returned.")
# Example test within the same script or a separate test script
def test_geocode_function2(conn):
# Example data insertion for testing
conn.execute("INSERT INTO geonames (zip, city, state, country_code, latitude, longitude) VALUES ('99999', 'Testville', 'TestState', 'US', '45.0', '-93.0')")
conn.commit()
# Test retrieval
location_info = geocode_location(conn, zip_code='99999')
print(location_info)
# Assuming you call this test function from your main or setup
# Main execution function
def main():
logging.basicConfig(level=logging.INFO)
db_path = 'geonames.db'
file_path = 'US.csv'
conn = sqlite3.connect(db_path)
create_geonames_table(conn)
import_geonames_data(conn, file_path)
# Run the test
test_geocode_function(conn)
test_geocode_function2(conn)
conn.close()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,76 @@
from vectordb import Memory
memory = Memory(memory_file="embedding.pt",
chunking_strategy={"mode": "sliding_window", "window_size": 128, "overlap": 16}, embeddings='TaylorAI/bge-micro-v2'
)
texts = [
"""
Machine learning is a method of data analysis that automates analytical model building.
It is a branch of artificial intelligence based on the idea that systems can learn from data,
identify patterns and make decisions with minimal human intervention.
Machine learning algorithms are trained on data sets that contain examples of the desired output. For example, a machine learning algorithm that is used to classify images might be trained on a data set that contains images of cats and dogs.
Once an algorithm is trained, it can be used to make predictions on new data. For example, the machine learning algorithm that is used to classify images could be used to predict whether a new image contains a cat or a dog.
Machine learning algorithms can be used to solve a wide variety of problems. Some common applications of machine learning include:
Classification: Categorizing data into different groups. For example, a machine learning algorithm could be used to classify emails as spam or not spam.
Regression: Predicting a continuous value. For example, a machine learning algorithm could be used to predict the price of a house.
Clustering: Finding groups of similar data points. For example, a machine learning algorithm could be used to find groups of customers with similar buying habits.
Anomaly detection: Finding data points that are different from the rest of the data. For example, a machine learning algorithm could be used to find fraudulent credit card transactions.
Machine learning is a powerful tool that can be used to solve a wide variety of problems. As the amount of data available continues to grow, machine learning is likely to become even more important in the future.
""",
"""
Artificial intelligence (AI) is the simulation of human intelligence in machines
that are programmed to think like humans and mimic their actions.
The term may also be applied to any machine that exhibits traits associated with
a human mind such as learning and problem-solving.
AI research has been highly successful in developing effective techniques for solving a wide range of problems, from game playing to medical diagnosis.
However, there is still a long way to go before AI can truly match the intelligence of humans. One of the main challenges is that human intelligence is incredibly complex and poorly understood.
Despite the challenges, AI is a rapidly growing field with the potential to revolutionize many aspects of our lives. Some of the potential benefits of AI include:
Increased productivity: AI can be used to automate tasks that are currently performed by humans, freeing up our time for more creative and fulfilling activities.
Improved decision-making: AI can be used to make more informed decisions, based on a wider range of data than humans can typically access.
Enhanced creativity: AI can be used to generate new ideas and solutions, beyond what humans can imagine on their own.
Of course, there are also potential risks associated with AI, such as:
Job displacement: As AI becomes more capable, it is possible that it will displace some human workers.
Weaponization: AI could be used to develop new weapons that are more powerful and destructive than anything we have today.
Loss of control: If AI becomes too powerful, we may lose control over it, with potentially disastrous consequences.
It is important to weigh the potential benefits and risks of AI carefully as we continue to develop this technology. With careful planning and oversight, AI has the potential to make the world a better place. However, if we are not careful, it could also lead to serious problems.
""",
]
metadata_list = [
{
"title": "Introduction to Machine Learning",
"url": "https://example.com/introduction-to-machine-learning",
},
{
"title": "Introduction to Artificial Intelligence",
"url": "https://example.com/introduction-to-artificial-intelligence",
},
]
memory.save(texts, metadata_list)
query = "What is the relationship between AI and machine learning?"
results = memory.search(query, top_n=3, unique=True)
print(results)
# two results will be returned as unique param is set to True

View file

@ -0,0 +1,15 @@
from vectordb import Memory
# Memory is where all content you want to store/search goes.
memory = Memory()
memory.save(
["apples are green", "oranges are orange"], # save your text content. for long text we will automatically chunk it
[{"url": "https://apples.com"}, {"url": "https://oranges.com"}], # associate any kind of metadata with it (optional)
)
# Search for top n relevant results, automatically using embeddings
query = "green"
results = memory.search(query, top_n = 1)
print(results)

View file

@ -0,0 +1,46 @@
# Updates watch complications for Secure ShellFish
#
# This command sends encrypted data through push notifications such
# that it doesn't need to run from a Secure ShellFish terminal.
if [[ $# -eq 0 ]]; then
cat <<EOF
# Usage: widget [target] <data> ...
# Update complication on device from which this function was installed with a number of content parameters that can be string, progress, icon, target or color.
# Each argument type is derived from input.
# Progress has the form: 50% or 110/220
# Icon must match valid SF Symbol name such as globe or terminal.fill
# Colors must be hex colours such as #000 #ff00ff where the color is used for later content and 'foreground' switches back to default colour
# Target is used to send different content to different complications after configuring the complications with different target identifiers which requires the pro unlock. The target parameter is never assumed unless --target is used and is effective until next --target parameter allowing updates of several complications with a single command
# You can configure complications to only show content for a given target.
# String is the fallback type if nothing else matches, but content type can be forced for next parameter with --progress, --icon, --color, --text or --target with
# something like:
widget --text "50/100"
# You can update several complications at once by using --target to send all parameters until the next --target to a particular complication. Updating several complications at once allows more total updates per day.
# EOF
# return 0
# fi
# local key=d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b
# local user=WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm
# local iv=ab5bbeb426015da7eedcee8bee3dffb7
# local plain=$(
# echo Secure ShellFish Widget 2.0
# for var in "$@"
# do
# echo -ne "$var" | base64
# done)
# local base64=$(echo "$plain" | openssl enc -aes-256-cbc -base64 -K $key -iv $iv)
# curl -sS -X POST -H "Content-Type: text/plain" --data "$base64" "https://secureshellfish.app/push/?user=$user"

View file

@ -0,0 +1,17 @@
#!/bin/bash
# Iterate from 18 to 30
for i in $(seq -w 01 30); do
# Construct the date string
DATE="2024-06-${i}"
# Print the date being processed (optional)
echo "Processing date: $DATE"
# Run the curl command
curl -X POST -H "Content-Type: application/json" -d '{"mood": "joyful"}' "http://localhost:4444/note/banner?dt=$DATE"
# Wait for the curl command to finish before starting the next iteration
wait
done

View file

@ -0,0 +1,71 @@
import requests
import os
import re
import sys
from datetime import datetime, timedelta
from sijapi import BASE_URL, OBSIDIAN_JOURNAL_DIR, GLOBAL_API_KEY
def create_folder_path(date):
year = date.strftime("%Y")
month = date.strftime("%Y-%m %B")
day = date.strftime("%Y-%m-%d %A")
path = f"{OBSIDIAN_JOURNAL_DIR}/{year}/{month}/{day}"
os.makedirs(path, exist_ok=True)
return f"{path}/{day}.md"
def fetch_markdown(date):
url = f"{BASE_URL}/time/markdown2?start_date={date.strftime('%Y-%m-%d')}"
headers = {'Authorization': f'Bearer {GLOBAL_API_KEY}'}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
else:
print(f"Failed to fetch data for {date.strftime('%Y-%m-%d')}: HTTP {response.status_code}")
return None
def update_markdown_file(file_path, new_markdown):
# table_regex = r"(?ms)^## Timeslips\s*\n{1,3}\|.*?\|.*?\|.*?\|\s*\n"
# table_regex = r"(?ms)^## Timeslips\s*\n{1,3}(\|.*?\|.*?\|.*?\|\s*\n)+\|[- ]+\|[- ]+\|[- ]+\|\s*\n"
# table_regex = r"(?ms)^## Timeslips\s*\n{1,3}(\|\s*[^|\n]+?\s*\|)+\s*\n(\|\s*-+\s*\|)+\s*\n(\|\s*[^|\n]*?\s*\|+\s*\n)+?\|\s*TOTAL\s*\|[^|\n]*\|\s*[^|\n]*\|\s*$"
table_regex = r"(?ms)^## Timeslips\s*\n{1,3}(\|[^|\r\n]*\|[^|\r\n]*\|[^|\r\n]*\|\s*\n)+\|\s*-+\s*\|\s*-+\s*\|\s*-+\s*\|\s*\n((\|[^|\r\n]*\|[^|\r\n]*\|[^|\r\n]*\|\s*\n)+)\|\s*TOTAL\s*\|\s*\|\s*[^|\r\n]*\|\s*$"
try:
with open(file_path, 'r+') as file:
content = file.read()
if re.search(table_regex, content):
updated_content = re.sub(table_regex, new_markdown, content)
else:
updated_content = content + '\n' + new_markdown
file.seek(0)
file.write(updated_content)
file.truncate()
except FileNotFoundError:
with open(file_path, 'w') as file:
file.write(new_markdown)
def main():
if len(sys.argv) < 2 or len(sys.argv) > 3:
print("Usage: python populateWorklogs.py <start_date> [end_date]")
sys.exit(1)
start_date_str = sys.argv[1]
if len(sys.argv) == 3:
end_date_str = sys.argv[2]
else:
end_date_str = start_date_str
start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
current_date = start_date
while current_date <= end_date:
file_path = create_folder_path(current_date)
markdown = fetch_markdown(current_date)
if markdown:
update_markdown_file(file_path, markdown)
current_date += timedelta(days=1)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load diff

65
sijapi/logs.py Normal file
View file

@ -0,0 +1,65 @@
import logging
from logging.handlers import RotatingFileHandler
from colorama import Fore, Style, init as colorama_init
import traceback
colorama_init(autoreset=True)
class ColorFormatter(logging.Formatter):
"""Custom formatter to add colors to log levels."""
COLOR_MAP = {
logging.DEBUG: Fore.GREEN,
logging.INFO: Fore.LIGHTBLUE_EX,
logging.WARNING: Fore.YELLOW,
logging.ERROR: Fore.RED,
logging.CRITICAL: Fore.MAGENTA,
}
def format(self, record):
# Colorize only the level name for INFO
if record.levelno == logging.INFO:
record.levelname = f"{self.COLOR_MAP[logging.INFO]}{record.levelname}{Style.RESET_ALL}"
record.msg = f"{record.msg}"
else:
# Colorize the entire message for other levels
color = self.COLOR_MAP.get(record.levelno, Fore.WHITE)
record.msg = f"{color}{record.msg}{Style.RESET_ALL}"
record.levelname = f"{color}{record.levelname}{Style.RESET_ALL}"
return super().format(record)
def get_level(level_str):
"""Convert a log level string to a logging level constant."""
level_str = level_str.upper()
if level_str == "DEBUG":
return logging.DEBUG
elif level_str == "INFO":
return logging.INFO
elif level_str == "WARNING":
return logging.WARNING
elif level_str == "ERROR":
return logging.ERROR
elif level_str == "CRITICAL":
return logging.CRITICAL
else:
raise ValueError(f"Invalid log level: {level_str}")
class Logger:
def __init__(self, logs_dir, log_level_str="INFO"):
self.logs_dir = logs_dir
self.log_level_str = log_level_str
self.logger = logging.getLogger("LOG")
self.setup()
def setup(self):
"""Function to setup the logger, clears any existing handlers first"""
log_level = get_level(self.log_level_str)
self.logger.setLevel(log_level)
handler = RotatingFileHandler(f'{self.logs_dir}/app.log', maxBytes=2000000, backupCount=10)
console_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
color_formatter = ColorFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
console_handler.setFormatter(color_formatter)
self.logger.addHandler(handler)
self.logger.addHandler(console_handler)
self.logger.propagate = False

165
sijapi/routers/asr.py Normal file
View file

@ -0,0 +1,165 @@
'''
Automatic Speech Recognition module relying on the `whisper_cpp` implementation of OpenAI's Whisper model.
Depends on:
LOGGER, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR
Notes:
Performs exceptionally well on Apple Silicon. Other devices will benefit from future updates to optionally use `faster_whisper`, `insanely_faster_whisper`, and/or `whisper_jax`.
'''
from fastapi import APIRouter, HTTPException, Form, UploadFile, File
from pydantic import BaseModel, Field
from typing import Optional
import tempfile
from fastapi.responses import JSONResponse, FileResponse
from pydantic import BaseModel, HttpUrl
from whisperplus.pipelines import mlx_whisper
from youtube_dl import YoutubeDL
from urllib.parse import unquote
import subprocess
import os
import uuid
from threading import Thread
import multiprocessing
import asyncio
import subprocess
import tempfile
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES
asr = APIRouter()
class TranscribeParams(BaseModel):
model: str = Field(default="small")
output_srt : Optional[bool] = Field(default=False)
language : Optional[str] = Field(None)
split_on_word : Optional[bool] = Field(default=False)
temperature : Optional[float] = Field(default=0)
temp_increment : Optional[int] = Field(None)
translate : Optional[bool] = Field(default=False)
diarize : Optional[bool] = Field(default=False)
tiny_diarize : Optional[bool] = Field(default=False)
no_fallback : Optional[bool] = Field(default=False)
output_json : Optional[bool] = Field(default=False)
detect_language : Optional[bool] = Field(default=False)
dtw : Optional[str] = Field(None)
threads : Optional[int] = Field(None)
from urllib.parse import unquote
import json
@asr.post("/asr")
@asr.post("/transcribe")
@asr.post("/v1/audio/transcription")
async def transcribe_endpoint(
file: UploadFile = File(...),
params: str = Form(...)
):
try:
# Decode the URL-encoded string
decoded_params = unquote(params)
# Parse the JSON string
parameters_dict = json.loads(decoded_params)
# Create TranscribeParams object
parameters = TranscribeParams(**parameters_dict)
except json.JSONDecodeError as json_err:
raise HTTPException(status_code=400, detail=f"Invalid JSON: {str(json_err)}")
except Exception as err:
raise HTTPException(status_code=400, detail=f"Error parsing parameters: {str(err)}")
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(await file.read())
temp_file_path = temp_file.name
transcription = await transcribe_audio(file_path=temp_file_path, params=parameters)
return transcription
async def transcribe_audio(file_path, params: TranscribeParams):
file_path = convert_to_wav(file_path)
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
command.extend(['-m', str(model_path)])
command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
command.extend(['-np']) # Always enable no-prints
if params.split_on_word:
command.append('-sow')
if params.temperature > 0:
command.extend(['-tp', str(params.temperature)])
if params.temp_increment:
command.extend(['-tpi', str(params.temp_increment)])
if params.language:
command.extend(['-l', params.language])
elif params.detect_language:
command.append('-dl')
if params.translate:
command.append('-tr')
if params.diarize:
command.append('-di')
if params.tiny_diarize:
command.append('-tdrz')
if params.no_fallback:
command.append('-nf')
if params.output_srt:
command.append('-osrt')
elif params.output_json:
command.append('-oj')
else:
command.append('-nt')
if params.dtw:
command.extend(['--dtw', params.dtw])
command.extend(['-f', file_path])
DEBUG(f"Command: {command}")
proc = await asyncio.create_subprocess_exec(
*command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise Exception(f"Error running command: {stderr.decode()}")
result = stdout.decode().strip()
DEBUG(f"Result: {result}")
return result
def convert_to_wav(file_path: str):
wav_file_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.wav")
subprocess.run(["ffmpeg", "-y", "-i", file_path, "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_path], check=True)
return wav_file_path
def download_from_youtube(url: str):
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
ytdl_opts = {
'outtmpl': temp_file,
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
'nooverwrites': True
}
with YoutubeDL(ytdl_opts) as ydl:
ydl.download([url])
return convert_to_wav(temp_file)
def format_srt_timestamp(seconds: float):
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
def write_srt(segments: list, output_file: str):
with open(output_file, 'w') as f:
for i, segment in enumerate(segments, start=1):
start = format_srt_timestamp(segment['start'])
end = format_srt_timestamp(segment['end'])
text = segment['text']
f.write(f"{i}\n{start} --> {end}\n{text}\n\n")

415
sijapi/routers/calendar.py Normal file
View file

@ -0,0 +1,415 @@
'''
Calendar module using macOS Calendars and/or Microsoft 365 via its Graph API.
Depends on:
LOGGER, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
'''
from fastapi import APIRouter, Depends, HTTPException, status, Request
from fastapi.responses import RedirectResponse, JSONResponse
from fastapi.security import OAuth2PasswordBearer
import httpx
import json
import os
import time
from dateutil.parser import isoparse as parse_iso
import threading
from typing import Dict, List, Any
from datetime import datetime, timedelta
from Foundation import NSDate, NSRunLoop
import EventKit as EK
from sijapi import ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
from sijapi.utilities import localize_dt, localize_dt
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
calendar = APIRouter()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
timeout = httpx.Timeout(12)
if MS365_TOGGLE is True:
CRITICAL(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
@calendar.get("/o365/login")
async def login():
DEBUG(f"Received request to /o365/login")
DEBUG(f"SCOPE: {MS365_SCOPE}")
if not MS365_SCOPE:
ERR("No scopes defined for authorization.")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="No scopes defined for authorization."
)
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
INFO(f"Redirecting to authorization URL: {authorization_url}")
return RedirectResponse(authorization_url)
@calendar.get("/o365/oauth_redirect")
async def oauth_redirect(code: str = None, error: str = None):
INFO(f"Received request to /o365/oauth_redirect")
if error:
ERR(f"OAuth2 Error: {error}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
)
INFO(f"Requesting token with authorization code: {code}")
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
data = {
"client_id": MS365_CLIENT_ID,
"client_secret": MS365_SECRET,
"code": code,
"redirect_uri": MS365_REDIRECT_PATH,
"grant_type": "authorization_code"
}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(token_url, data=data)
DEBUG(f"Token endpoint response status code: {response.status_code}")
INFO(f"Token endpoint response text: {response.text}")
result = response.json()
if 'access_token' in result:
await save_token(result)
INFO("Access token obtained successfully")
return {"message": "Access token stored successfully"}
else:
CRITICAL(f"Failed to obtain access token. Response: {result}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to obtain access token"
)
@calendar.get("/o365/me")
async def read_items():
INFO(f"Received request to /o365/me")
token = await load_token()
if not token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Access token not found",
)
graph_url = "https://graph.microsoft.com/v1.0/me"
headers = {"Authorization": f"Bearer {token['access_token']}"}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(graph_url, headers=headers)
if response.status_code == 200:
user = response.json()
INFO(f"User retrieved: {user}")
return user
else:
ERR("Invalid or expired token")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired token",
headers={"WWW-Authenticate": "Bearer"},
)
async def save_token(token):
DEBUG(f"Saving token: {token}")
try:
token["expires_at"] = int(time.time()) + token["expires_in"]
with open(MS365_TOKEN_PATH, "w") as file:
json.dump(token, file)
DEBUG(f"Saved token to {MS365_TOKEN_PATH}")
except Exception as e:
ERR(f"Failed to save token: {e}")
async def load_token():
if os.path.exists(MS365_TOKEN_PATH):
try:
with open(MS365_TOKEN_PATH, "r") as file:
token = json.load(file)
except FileNotFoundError:
ERR("Token file not found.")
return None
except json.JSONDecodeError:
ERR("Failed to decode token JSON")
return None
if token:
token["expires_at"] = int(time.time()) + token["expires_in"]
DEBUG(f"Loaded token: {token}") # Add this line to log the loaded token
return token
else:
DEBUG("No token found.")
return None
else:
WARN(f"No file found at {MS365_TOKEN_PATH}")
return None
async def is_token_expired(token):
if "expires_at" not in token:
return True # Treat missing expiration time as expired token
expiry_time = datetime.fromtimestamp(token["expires_at"])
return expiry_time <= datetime.now()
async def is_token_expired2(token):
graph_url = "https://graph.microsoft.com/v1.0/me"
headers = {"Authorization": f"Bearer {token}"}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(graph_url, headers=headers)
return response.status_code == 401
async def get_new_token_with_refresh_token(refresh_token):
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
data = {
"client_id": MS365_CLIENT_ID,
"client_secret": MS365_SECRET,
"refresh_token": refresh_token,
"grant_type": "refresh_token",
"scope": " ".join(MS365_SCOPE),
}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(token_url, data=data)
result = response.json()
if "access_token" in result:
INFO("Access token refreshed successfully")
return result
else:
ERR("Failed to refresh access token")
return None
async def refresh_token():
token = await load_token()
if not token:
ERR("No token found in storage")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="No token found",
)
if 'refresh_token' not in token:
ERR("Refresh token not found in the loaded token")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Refresh token not found",
)
refresh_token = token['refresh_token']
DEBUG("Found refresh token, attempting to refresh access token")
new_token = await get_new_token_with_refresh_token(refresh_token)
if new_token:
await save_token(new_token)
INFO("Token refreshed and saved successfully")
else:
ERR("Failed to refresh token")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to refresh token",
)
def get_calendar_ids() -> Dict[str, str]:
event_store = EK.EKEventStore.alloc().init()
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
calendar_identifiers = {
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
}
INFO(f"{calendar_identifiers}")
return calendar_identifiers
# Helper to convert datetime to NSDate
def datetime_to_nsdate(dt: datetime) -> NSDate:
return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp())
@calendar.get("/events")
async def get_events_endpoint(start_date: str, end_date: str):
start_dt = localize_dt(start_date)
end_dt = localize_dt(end_date)
datetime.strptime(start_date, "%Y-%m-%d") or datetime.now()
end_dt = datetime.strptime(end_date, "%Y-%m-%d") or datetime.now()
response = await get_events(start_dt, end_dt)
return JSONResponse(content=response, status_code=200)
async def get_events(start_dt: datetime, end_dt: datetime) -> List:
combined_events = []
if MS365_TOGGLE:
ms_events = await get_ms365_events(start_dt, end_dt)
combined_events.extend(ms_events) # Use extend instead of append
if ICAL_TOGGLE:
calendar_ids = ICALENDARS
macos_events = get_macos_calendar_events(start_dt, end_dt, calendar_ids)
combined_events.extend(macos_events) # Use extend instead of append
parsed_events = await parse_calendar_for_day(start_dt, end_dt, combined_events)
return parsed_events
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
event_store = EK.EKEventStore.alloc().init()
# Request access to EventKit
def request_access() -> bool:
access_granted = []
def completion_handler(granted, error):
if error is not None:
ERR(f"Error: {error}")
access_granted.append(granted)
# Notify the main thread that the completion handler has executed
with access_granted_condition:
access_granted_condition.notify()
access_granted_condition = threading.Condition()
with access_granted_condition:
event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent
# Wait for the completion handler to be called
access_granted_condition.wait(timeout=10)
# Verify that the handler was called and access_granted is not empty
if access_granted:
return access_granted[0]
else:
ERR("Request access timed out or failed")
return False
if not request_access():
ERR("Access to calendar data was not granted")
return []
ns_start_date = datetime_to_nsdate(start_date)
ns_end_date = datetime_to_nsdate(end_date)
# Retrieve all calendars
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
if calendar_ids:
selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids]
else:
selected_calendars = all_calendars
# Filtering events by selected calendars
predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars)
events = event_store.eventsMatchingPredicate_(predicate)
event_list = []
for event in events:
# Check if event.attendees() returns None
if event.attendees():
attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()]
else:
attendees = []
# Format the start and end dates properly
start_date_str = event.startDate().descriptionWithLocale_(None)
end_date_str = event.endDate().descriptionWithLocale_(None)
event_data = {
"subject": event.title(),
"id": event.eventIdentifier(),
"start": start_date_str,
"end": end_date_str,
"bodyPreview": event.notes() if event.notes() else '',
"attendees": attendees,
"location": event.location() if event.location() else '',
"onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this
"showAs": 'busy', # Default to 'busy'
"isAllDay": event.isAllDay()
}
event_list.append(event_data)
return event_list
async def get_ms365_events(start_date: datetime, end_date: datetime):
token = await load_token()
if token:
if await is_token_expired(token):
await refresh_token()
else:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Access token not found",
)
# this looks like it might need updating to use tz-aware datetimes converted to UTC...
graph_url = f"https://graph.microsoft.com/v1.0/me/events?$filter=start/dateTime ge '{start_date}T00:00:00' and end/dateTime le '{end_date}T23:59:59'"
headers = {
"Authorization": f"Bearer {token['access_token']}",
"Prefer": 'outlook.timezone="Pacific Standard Time"',
}
async with httpx.AsyncClient() as client:
response = await client.get(graph_url, headers=headers)
if response.status_code != 200:
ERR("Failed to retrieve events from Microsoft 365")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve events",
)
ms_events = response.json().get("value", [])
return ms_events
async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]):
range_start = localize_dt(range_start)
range_end = localize_dt(range_end)
event_list = []
for event in events:
INFO(f"Event: {event}")
start_str = event.get('start')
end_str = event.get('end')
if isinstance(start_str, dict):
start_str = start_str.get('dateTime')
else:
INFO(f"Start date string not a dict")
if isinstance(end_str, dict):
end_str = end_str.get('dateTime')
else:
INFO(f"End date string not a dict")
try:
start_date = localize_dt(start_str) if start_str else None
except (ValueError, TypeError) as e:
ERR(f"Invalid start date format: {start_str}, error: {e}")
continue
try:
end_date = localize_dt(end_str) if end_str else None
except (ValueError, TypeError) as e:
ERR(f"Invalid end date format: {end_str}, error: {e}")
continue
DEBUG(f"Comparing {start_date} with range {range_start} to {range_end}")
if start_date:
# Ensure start_date is timezone-aware
start_date = localize_dt(start_date)
# If end_date is not provided, assume it's the same as start_date
if not end_date:
end_date = start_date
else:
end_date = localize_dt(end_date)
# Check if the event overlaps with the given range
if (start_date < range_end) and (end_date > range_start):
attendees = [{'name': att['name'], 'email': att['email']} for att in event.get('attendees', []) if 'name' in att and 'email' in att]
location = event.get('location', '')
if isinstance(location, dict):
location = location.get('displayName', '')
event_data = {
"name": event.get('subject', ''),
"uid": event.get('id', ''),
"start": start_date.strftime('%H:%M'),
"end": end_date.strftime('%H:%M') if end_date else '',
"description": event.get('bodyPreview', ''),
"attendees": attendees,
"location": location,
"url": event.get('onlineMeetingUrl', ''),
"busystatus": event.get('showAs', ''),
"busy": event.get('showAs', '') in ['busy', 'tentative'],
"all_day": event.get('isAllDay', False)
}
INFO(f"Event_data: {event_data}")
event_list.append(event_data)
else:
DEBUG(f"Event outside of specified range: {start_date} to {end_date}")
else:
ERR(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
return event_list

209
sijapi/routers/cf.py Normal file
View file

@ -0,0 +1,209 @@
'''
IN DEVELOPMENT - Cloudflare + Caddy module. Based on a bash script that's able to rapidly deploy new Cloudflare subdomains on new Caddy reverse proxy configurations, managing everything including restarting Caddy. The Python version needs more testing before actual use.
'''
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from fastapi.responses import PlainTextResponse, JSONResponse
from typing import Optional
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
import httpx
import asyncio
from asyncio import sleep
import os
cf = APIRouter()
class DNSRecordRequest(BaseModel):
full_domain: str
ip: Optional[str] = None
port: str
# Update to make get_zone_id async
async def get_zone_id(domain: str) -> str:
url = f"{CF_API_BASE_URL}/zones"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
params = {"name": domain}
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
if data['success']:
if len(data['result']) > 0:
return data['result'][0]['id']
else:
raise ValueError(f"No Zone ID found for domain '{domain}'")
else:
errors = ', '.join(err['message'] for err in data['errors'])
raise ValueError(f"Cloudflare API returned errors: {errors}")
async def update_caddyfile(full_domain, caddy_ip, port):
caddy_config = f"""
{full_domain} {{
reverse_proxy {caddy_ip}:{port}
tls {{
dns cloudflare {{"$CLOUDFLARE_API_TOKEN"}}
}}
}}
"""
with open(CADDYFILE_PATH, 'a') as file:
file.write(caddy_config)
# Using asyncio to create subprocess
proc = await asyncio.create_subprocess_exec("sudo", "systemctl", "restart", "caddy")
await proc.communicate()
# Retry mechanism for API calls
async def retry_request(url, headers, max_retries=5, backoff_factor=1):
for retry in range(max_retries):
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url, headers=headers)
response.raise_for_status()
return response
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
ERR(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
await sleep(backoff_factor * (2 ** retry))
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
# Helper function to load Caddyfile domains
def load_caddyfile_domains():
with open(CADDYFILE_PATH, 'r') as file:
caddyfile_content = file.read()
domains = []
for line in caddyfile_content.splitlines():
if line.strip() and not line.startswith('#'):
if "{" in line:
domain = line.split("{")[0].strip()
domains.append(domain)
return domains
# Endpoint to add new configuration to Cloudflare, Caddyfile, and cf_domains.json
@cf.post("/cf/add_config")
async def add_config(record: DNSRecordRequest):
full_domain = record.full_domain
caddy_ip = record.ip or "localhost"
port = record.port
# Extract subdomain and domain
parts = full_domain.split(".")
if len(parts) == 2:
domain = full_domain
subdomain = "@"
else:
subdomain = parts[0]
domain = ".".join(parts[1:])
zone_id = await get_zone_id(domain)
if not zone_id:
raise HTTPException(status_code=400, detail=f"Zone ID for {domain} could not be found")
# API call setup for Cloudflare A record
endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
data = {
"type": "A",
"name": subdomain,
"content": CF_IP,
"ttl": 120,
"proxied": True
}
async with httpx.AsyncClient() as client:
response = await client.post(endpoint, headers=headers, json=data)
result = response.json()
if not result.get("success", False):
error_message = result.get("errors", [{}])[0].get("message", "Unknown error")
error_code = result.get("errors", [{}])[0].get("code", "Unknown code")
raise HTTPException(status_code=400, detail=f"Failed to create A record: {error_message} (Code: {error_code})")
# Update Caddyfile
await update_caddyfile(full_domain, caddy_ip, port)
return {"message": "Configuration added successfully"}
@cf.get("/cf/list_zones")
async def list_zones_endpoint():
domains = await list_zones()
return JSONResponse(domains)
async def list_zones():
endpoint = f"{CF_API_BASE_URL}/zones"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
async with httpx.AsyncClient() as client: # async http call
response = await client.get(endpoint, headers=headers)
response.raise_for_status()
result = response.json()
if not result.get("success"):
raise HTTPException(status_code=400, detail="Failed to retrieve zones from Cloudflare")
zones = result.get("result", [])
domains = {}
for zone in zones:
zone_id = zone.get("id")
zone_name = zone.get("name")
domains[zone_name] = {"zone_id": zone_id}
records_endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
async with httpx.AsyncClient() as client: # async http call
records_response = await client.get(records_endpoint, headers=headers)
records_result = records_response.json()
if not records_result.get("success"):
raise HTTPException(status_code=400, detail=f"Failed to retrieve DNS records for zone {zone_name}")
records = records_result.get("result", [])
for record in records:
record_id = record.get("id")
domain_name = record.get("name").replace(f".{zone_name}", "")
domains[zone_name].setdefault(domain_name, {})["dns_id"] = record_id
return domains
@cf.get("/cf/compare_caddy", response_class=PlainTextResponse)
async def crossreference_caddyfile():
cf_domains_data = await list_zones()
caddyfile_domains = load_caddyfile_domains()
cf_domains_list = [
f"{sub}.{domain}" if sub != "@" else domain
for domain, data in cf_domains_data.items()
for sub in data.get("subdomains", {}).keys()
]
caddyfile_domains_set = set(caddyfile_domains)
cf_domains_set = set(cf_domains_list)
only_in_caddyfile = caddyfile_domains_set - cf_domains_set
only_in_cf_domains = cf_domains_set - caddyfile_domains_set
markdown_output = "# Cross-reference cf_domains.json and Caddyfile\n\n"
markdown_output += "## Domains only in Caddyfile:\n\n"
for domain in only_in_caddyfile:
markdown_output += f"- **{domain}**\n"
markdown_output += "\n## Domains only in cf_domains.json:\n\n"
for domain in only_in_cf_domains:
markdown_output += f"- **{domain}**\n"
return markdown_output

253
sijapi/routers/email.py Normal file
View file

@ -0,0 +1,253 @@
'''
IN DEVELOPMENT Email module. Uses IMAP and SMTP login credentials to monitor an inbox and summarize incoming emails that match certain criteria and save the Text-To-Speech converted summaries into a specified "podcast" folder.
UNIMPLEMENTED: AI auto-responder.
'''
from fastapi import APIRouter
import asyncio
from imbox import Imbox
from bs4 import BeautifulSoup
import os
from pathlib import Path
from shutil import move
import tempfile
import re
import ssl
from smtplib import SMTP_SSL
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from datetime import datetime as dt_datetime
from pydantic import BaseModel
from typing import List, Optional, Any
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, DATA_DIR, OBSIDIAN_VAULT_DIR, PODCAST_DIR, IMAP, OBSIDIAN_JOURNAL_DIR, DEFAULT_VOICE, AUTORESPONSE_BLACKLIST, AUTORESPONSE_WHITELIST, AUTORESPONSE_CONTEXT, USER_FULLNAME, USER_BIO, AUTORESPOND, TZ
from sijapi.routers import summarize, tts, llm
from sijapi.utilities import clean_text, assemble_journal_path, localize_dt, extract_text, prefix_lines
email = APIRouter(tags=["private"])
class Contact(BaseModel):
email: str
name: str
class EmailModel(BaseModel):
sender: str
recipients: List[Contact]
datetime_received: dt_datetime
subject: str
body: str
attachments: Optional[List[Any]] = None
def imap_conn():
return Imbox(IMAP.host,
username=IMAP.email,
password=IMAP.password,
port=IMAP.imap_port,
ssl=IMAP.imap_encryption == 'SSL',
starttls=IMAP.imap_encryption == 'STARTTLS')
def clean_email_content(html_content):
soup = BeautifulSoup(html_content, "html.parser")
return re.sub(r'[ \t\r\n]+', ' ', soup.get_text()).strip()
async def extract_attachments(attachments) -> List[str]:
attachment_texts = []
for attachment in attachments:
attachment_name = attachment.get('filename', 'tempfile.txt')
_, ext = os.path.splitext(attachment_name)
ext = ext.lower() if ext else '.txt'
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
tmp_file.write(attachment['content'].getvalue())
tmp_file_path = tmp_file.name
try:
attachment_text = await extract_text(tmp_file_path)
attachment_texts.append(attachment_text)
finally:
if os.path.exists(tmp_file_path):
os.remove(tmp_file_path)
return attachment_texts
async def process_unread_emails(auto_respond: bool = AUTORESPOND, summarize_emails: bool = True, podcast: bool = True):
while True:
try:
with imap_conn() as inbox:
unread_messages = inbox.messages(unread=True)
for uid, message in unread_messages:
recipients = [Contact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
this_email = EmailModel(
sender=message.sent_from[0]['email'],
datetime_received=localize_dt(message.date),
recipients=recipients,
subject=message.subject,
body=clean_email_content(message.body['html'][0]) if message.body['html'] else clean_email_content(message.body['plain'][0]) or "",
attachments=message.attachments
)
DEBUG(f"\n\nProcessing email: {this_email.subject}\n\n")
md_path, md_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".md")
tts_path, tts_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".wav")
if summarize_emails:
email_content = f'At {this_email.datetime_received}, {this_email.sender} sent an email with the subject line "{this_email.subject}". The email in its entirety reads: \n\n{this_email.body}\n"'
if this_email.attachments:
attachment_texts = await extract_attachments(this_email.attachments)
email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts])
summary = await summarize.summarize_text(email_content)
await tts.local_tts(text_content = summary, speed = 1.1, voice = DEFAULT_VOICE, podcast = podcast, output_path = tts_path)
if podcast:
if PODCAST_DIR.exists():
tts.copy_to_podcast_dir(tts_path)
else:
ERR(f"PODCAST_DIR does not exist: {PODCAST_DIR}")
save_email_as_markdown(this_email, summary, md_path, tts_relative)
else:
save_email_as_markdown(this_email, None, md_path, None)
if auto_respond and should_auto_respond(this_email):
DEBUG(f"Auto-responding to {this_email.subject}")
auto_response_subject = 'Auto-Response Re:' + this_email.subject
auto_response_body = await generate_auto_response_body(this_email)
DEBUG(f"Auto-response: {auto_response_body}")
await send_auto_response(this_email.sender, auto_response_subject, auto_response_body)
inbox.mark_seen(uid)
await asyncio.sleep(30)
except Exception as e:
ERR(f"An error occurred: {e}")
await asyncio.sleep(30)
def save_email_as_markdown(email: EmailModel, summary: str, md_path: Path, tts_path: Path):
'''
Saves an email as a markdown file in the specified directory.
Args:
email (EmailModel): The email object containing email details.
summary (str): The summary of the email.
tts_path (str): The path to the text-to-speech audio file.
'''
# Sanitize filename to avoid issues with filesystems
filename = f"{email.datetime_received.strftime('%Y%m%d%H%M%S')}_{email.subject.replace('/', '-')}.md".replace(':', '-').replace(' ', '_')
summary = prefix_lines(summary, '> ')
# Create the markdown content
markdown_content = f'''---
date: {email.datetime_received.strftime('%Y-%m-%d')}
tags:
- email
---
| | | |
| --: | :--: | :--: |
| *received* | **{email.datetime_received.strftime('%B %d, %Y at %H:%M:%S %Z')}** | |
| *from* | **[[{email.sender}]]** | |
| *to* | {', '.join([f'**[[{recipient}]]**' for recipient in email.recipients])} | |
| *subject* | **{email.subject}** | |
'''
if summary:
markdown_content += f'''
> [!summary] Summary
> {summary}
'''
if tts_path:
markdown_content += f'''
![[{tts_path}]]
'''
markdown_content += f'''
---
{email.body}
'''
with open(md_path, 'w', encoding='utf-8') as md_file:
md_file.write(markdown_content)
DEBUG(f"Saved markdown to {md_path}")
AUTORESPONSE_SYS = "You are a helpful AI assistant that generates personalized auto-response messages to incoming emails."
async def generate_auto_response_body(e: EmailModel, response_style: str = "professional") -> str:
age = dt_datetime.now(TZ) - e.datetime_received
prompt = f'''
Please generate a personalized auto-response to the following email. The email is from {e.sender} and was sent {age} ago with the subject line "{e.subject}." You are auto-responding on behalf of {USER_FULLNAME}, who is described by the following short bio (strictly for your context -- do not recite this in the response): "{USER_BIO}." {USER_FULLNAME} is unable to respond himself, because {AUTORESPONSE_CONTEXT}. Everything from here to ~~//END//~~ is the email body.
{e.body}
~~//END//~~
Keep your auto-response {response_style} and to the point, but do aim to make it responsive specifically to the sender's inquiry.
'''
try:
response = await llm.query_ollama(prompt, AUTORESPONSE_SYS, 400)
return response
except Exception as e:
ERR(f"Error generating auto-response: {str(e)}")
return "Thank you for your email. Unfortunately, an error occurred while generating the auto-response. We apologize for any inconvenience."
async def send_auto_response(to_email, subject, body):
try:
message = MIMEMultipart()
message['From'] = IMAP.email # smtp_username
message['To'] = to_email
message['Subject'] = subject
message.attach(MIMEText(body, 'plain'))
# DEBUG(f"Attempting to send auto_response to {to_email} concerning {subject}. We will use {IMAP.host}:{IMAP.smtp_port}, un: {IMAP.email}, pw: {IMAP.password}")
try:
DEBUG(f"Initiating attempt to send auto-response via SMTP at {IMAP.host}:{IMAP.smtp_port}...")
context = ssl._create_unverified_context()
with SMTP_SSL(IMAP.host, IMAP.smtp_port, context=context) as server:
server.login(IMAP.email, IMAP.password)
DEBUG(f"Successfully logged in to {IMAP.host} at {IMAP.smtp_port} as {IMAP.email}. Attempting to send email now.")
server.send_message(message)
INFO(f"Auto-response sent to {to_email} concerning {subject}")
except Exception as e:
ERR(f"Failed to send auto-response email to {to_email}: {e}")
raise e
except Exception as e:
ERR(f"Error in preparing/sending auto-response: {e}")
raise e
def should_auto_respond(email: EmailModel) -> bool:
def matches_list(item: str, email: EmailModel) -> bool:
if '@' in item:
if item in email.sender:
return True
else:
if item.lower() in email.subject.lower() or item.lower() in email.body.lower():
return True
return False
if AUTORESPONSE_WHITELIST:
for item in AUTORESPONSE_WHITELIST:
if matches_list(item, email):
if AUTORESPONSE_BLACKLIST:
for blacklist_item in AUTORESPONSE_BLACKLIST:
if matches_list(blacklist_item, email):
return False
return True
return False
else:
if AUTORESPONSE_BLACKLIST:
for item in AUTORESPONSE_BLACKLIST:
if matches_list(item, email):
return False
return True
@email.on_event("startup")
async def startup_event():
asyncio.create_task(process_unread_emails())

66
sijapi/routers/health.py Normal file
View file

@ -0,0 +1,66 @@
'''
Health check module. /health returns `'status': 'ok'`, /id returns TS_ID, /routers responds with a list of the active routers, /ip responds with the device's local IP, /ts_ip responds with its tailnet IP, and /wan_ip responds with WAN IP.
Depends on:
TS_ID, ROUTERS, LOGGER, SUBNET_BROADCAST
'''
import os
import httpx
import socket
from fastapi import APIRouter
from tailscale import Tailscale
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import TS_ID, ROUTERS, SUBNET_BROADCAST
health = APIRouter(tags=["public", "trusted", "private"])
@health.get("/health")
def get_health():
return {"status": "ok"}
@health.get("/id")
def get_health() -> str:
return TS_ID
@health.get("/routers")
def get_routers() -> str:
listrouters = ", ".join(ROUTERS)
return listrouters
@health.get("/ip")
def get_local_ip():
"""Get the server's local IP address."""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
s.connect((f'{SUBNET_BROADCAST}', 1))
IP = s.getsockname()[0]
except Exception:
IP = '127.0.0.1'
finally:
s.close()
return IP
@health.get("/wan_ip")
async def get_wan_ip():
"""Get the WAN IP address using Mullvad's API."""
async with httpx.AsyncClient() as client:
try:
response = await client.get('https://am.i.mullvad.net/json')
response.raise_for_status()
wan_info = response.json()
return wan_info.get('ip', 'Unavailable')
except Exception as e:
WARN(f"Error fetching WAN IP: {e}")
return "Unavailable"
@health.get("/ts_ip")
async def get_tailscale_ip():
"""Get the Tailscale IP address."""
tailnet = os.getenv("TAILNET")
api_key = os.getenv("TAILSCALE_API_KEY")
async with Tailscale(tailnet=tailnet, api_key=api_key) as tailscale:
devices = await tailscale.devices()
if devices:
# Assuming you want the IP of the first device in the list
return devices[0]['addresses'][0]
else:
return "No devices found"

358
sijapi/routers/hooks.py Normal file
View file

@ -0,0 +1,358 @@
'''
Webhook module for specific use cases.
Depends on:
LOGGER, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, MAC_UN, MAC_PW, MAC_ID, TS_TAILNET
'''
from fastapi import APIRouter, Request, BackgroundTasks, HTTPException, status
from fastapi.responses import JSONResponse, RedirectResponse
import httpx
import json
from pathlib import Path
import asyncio
from datetime import datetime
import os, io
from PyPDF2 import PdfReader
import aiohttp
import paramiko
import time
import subprocess
from pydantic import BaseModel
from typing import List, Optional
import requests
import base64
from hashlib import sha256
# from O365 import Account, FileSystemTokenBackend
from typing import List
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, MAC_UN, MAC_PW, MAC_ID, TS_TAILNET
hooks = APIRouter()
with open(CASETABLE_PATH, 'r') as file:
CASETABLE = json.load(file)
class WidgetUpdate(BaseModel):
text: Optional[str] = None
progress: Optional[str] = None
icon: Optional[str] = None
color: Optional[str] = None
url: Optional[str] = None
shortcut: Optional[str] = None
graph: Optional[str] = None
@hooks.get("/health_check")
def hook_health():
shellfish_health_check()
@hooks.post("/update_widget")
def hook_widget_update(update: WidgetUpdate):
shellfish_update_widget(update)
@hooks.get("/alert")
async def hook_alert(request: Request):
alert = request.query_params.get('alert')
if not alert:
raise HTTPException(status_code=400, detail='No alert provided.')
return await notify(alert)
@hooks.post("/alert/cd")
async def hook_changedetection(webhook_data: dict):
body = webhook_data.get("body", {})
message = body.get("message", "")
if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]):
filename = ALERTS_DIR / f"alert_{int(time.time())}.json"
filename.write_text(json.dumps(webhook_data, indent=4))
notify(message)
return {"status": "received"}
@hooks.post("/cl/search")
async def hook_cl_search(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
DEBUG(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
# Save the payload data
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(cl_search_process_result, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
@hooks.post("/cl/docket")
async def hook_cl_docket(request: Request):
client_ip = request.client.host
DEBUG(f"Received request from IP: {client_ip}")
data = await request.json()
await cl_docket(data, client_ip)
async def notify(alert: str):
try:
await notify_shellfish(alert)
if TS_ID == MAC_ID:
await notify_local(alert)
else:
await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to send alert: {str(e)}")
return {"message": alert}
async def notify_local(message: str):
await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
# Asynchronous remote notification using paramiko SSH
async def notify_remote(host: str, message: str, username: str = None, password: str = None, key_filename: str = None):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
connect_kwargs = {'hostname': host, 'username': username}
if key_filename:
connect_kwargs['key_filename'] = key_filename
else:
connect_kwargs['password'] = password
await asyncio.to_thread(ssh.connect, **connect_kwargs)
await asyncio.to_thread(ssh.exec_command, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
ssh.close()
async def notify_shellfish(alert: str):
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
iv = "ab5bbeb426015da7eedcee8bee3dffb7"
plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n"
openssl_command = [
"openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv
]
process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode())
if process.returncode != 0:
raise Exception(f"OpenSSL encryption failed: {stderr.decode()}")
base64_encoded = stdout.decode().strip()
url = f"https://secureshellfish.app/push/?user={user}&mutable"
headers = {"Content-Type": "text/plain"}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, data=base64_encoded) as response:
if response.status != 200:
raise Exception(f"Failed to send notification: {response.status_code}")
## SHELLFISH ##
def shellfish_health_check():
addresses = [
"https://api.sij.ai/health",
"http://100.64.64.20:4444/health",
"http://100.64.64.30:4444/health",
"http://100.64.64.11:4444/health",
"http://100.64.64.15:4444/health"
]
results = []
up_count = 0
for address in addresses:
try:
response = requests.get(address)
if response.status_code == 200:
results.append(f"{address} is up")
up_count += 1
else:
results.append(f"{address} returned status code {response.status_code}")
except requests.exceptions.RequestException:
results.append(f"{address} is down")
# Generate a simple text-based graph
graph = '|' * up_count + '.' * (len(addresses) - up_count)
text_update = "\n".join(results)
widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"]
output = shellfish_run_widget_command(widget_command)
return {"output": output, "graph": graph}
def shellfish_update_widget(update: WidgetUpdate):
widget_command = ["widget"]
if update.text:
widget_command.extend(["--text", update.text])
if update.progress:
widget_command.extend(["--progress", update.progress])
if update.icon:
widget_command.extend(["--icon", update.icon])
if update.color:
widget_command.extend(["--color", update.color])
if update.url:
widget_command.extend(["--url", update.url])
if update.shortcut:
widget_command.extend(["--shortcut", update.shortcut])
if update.graph:
widget_command.extend(["--text", update.graph])
output = shellfish_run_widget_command(widget_command)
return {"output": output}
def shellfish_run_widget_command(args: List[str]):
result = subprocess.run(args, capture_output=True, text=True, shell=True)
if result.returncode != 0:
raise HTTPException(status_code=500, detail=result.stderr)
return result.stdout
### COURTLISTENER FUNCTIONS ###
async def cl_docket(data, client_ip, background_tasks: BackgroundTasks):
payload = data['payload']
results = data['payload']['results']
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(cl_docket_process, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def cl_docket_process(result):
async with httpx.AsyncClient() as session:
await cl_docket_process_result(result, session)
async def cl_docket_process_result(result, session):
docket = str(result.get('docket'))
case_code, case_shortname = cl_case_details(docket)
date_filed = result.get('date_filed', 'No Date Filed')
try:
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
except ValueError:
date_filed_formatted = 'NoDateFiled'
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
DEBUG(f"Fetching CourtListener docket information for {docket}...")
data = await response.json()
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name']
DEBUG(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else:
DEBUG("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket'
case_name = 'NoCaseName'
for document in result.get('recap_documents', []):
filepath_ia = document.get('filepath_ia')
filepath_local = document.get('filepath_local')
if filepath_ia:
file_url = filepath_ia
DEBUG(f"Found IA file at {file_url}.")
elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
DEBUG(f"Found local file at {file_url}.")
else:
DEBUG(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue
document_number = document.get('document_number', 'NoDocumentNumber')
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
description = description[:50] # Truncate description
# case_shortname = case_name # TEMPORARY OVERRIDE
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(file_url, target_path, session)
DEBUG(f"Downloaded {file_name} to {target_path}")
def cl_case_details(docket):
case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
case_code = case_info.get("code")
short_name = case_info.get("shortname")
return case_code, short_name
async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
async with aiohttp.ClientSession() as session:
DEBUG(f"Attempting to download {url} to {path}.")
try:
async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403:
ERR(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return
response.raise_for_status()
# Check if the response content type is a PDF
content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf':
ERR(f"Invalid content type: {content_type}. Skipping download.")
return
# Create an in-memory buffer to store the downloaded content
buffer = io.BytesIO()
async for chunk in response.content.iter_chunked(1024):
buffer.write(chunk)
# Reset the buffer position to the beginning
buffer.seek(0)
# Validate the downloaded PDF content
try:
PdfReader(buffer)
except Exception as e:
ERR(f"Invalid PDF content: {str(e)}. Skipping download.")
return
# If the PDF is valid, write the content to the file on disk
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('wb') as file:
file.write(buffer.getvalue())
except Exception as e:
ERR(f"Error downloading file: {str(e)}")
async def cl_search_process_result(result):
async with httpx.AsyncClient() as session:
download_url = result.get('download_url')
court_id = result.get('court_id')
case_name_short = result.get('caseNameShort')
case_name = result.get('caseName')
DEBUG(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id
if case_name_short:
case_folder = case_name_short
else:
case_folder = case_name
file_name = download_url.split('/')[-1]
target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(download_url, target_path, session)
DEBUG(f"Downloaded {file_name} to {target_path}")

973
sijapi/routers/ig.py Normal file
View file

@ -0,0 +1,973 @@
'''
IN DEVELOPMENT: Instagram AI bot module.
'''
from fastapi import APIRouter, UploadFile
import os
import io
import copy
import re
import jwt
import json
from tqdm import tqdm
import pyotp
import time
import pytz
import requests
import tempfile
import random
import subprocess
import urllib.request
import uuid
from fastapi import APIRouter
from time import sleep
from datetime import timedelta, datetime as date
from PIL import Image
from pydantic import BaseModel
from typing import Dict, List, Optional
import instagrapi
from instagrapi import Client as igClient
from instagrapi.types import UserShort
from urllib.parse import urlparse
from instagrapi.exceptions import LoginRequired as ClientLoginRequiredError
import json
from ollama import Client as oLlama
from sd import sd
from dotenv import load_dotenv
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, COMFYUI_DIR
import io
from io import BytesIO
import base64
ig = APIRouter()
class IG_Request(BaseModel):
file: Optional[UploadFile] = None # upload a particular file to Instagram
profile: Optional[str] = None # specify the profile account to use (uses the shortnames defined per folders and the config file)
local_only: Optional[bool] = False # overrides all other settings to ensure images are generated locally and stay local
openai: Optional[str] = None # OpenAI API key; if included, will rely on it for DALL-E, GPT-4, and GPT-4-Vision unless otherwise overridden
llm: Optional[str] = "llama3" # if a valid OpenAI model name is provided, it will be used; otherwise it will attempt to match to an Ollama model (if one exists)
i2t: Optional[str] = "llava" # set to GPT-4-Vision to use the OpenAI image-2-text model, otherwise this will attempt to match to a vision-capable Ollama model
t2i: Optional[str] = None # set to DALL-E to use the OpenAI model, or use it to override the StableDiffusion workflow that's otherwise selected. Leave blank to use defaults per the config file
ig_post: Optional[str] = True # if given a value, will use this as the category of post; if given no value, willuse all categories unless ig_comment_only is enabled
ig_comment: Optional[str] = None # if given a value, will use this as the category of comment; if given no value, will use all categories unless ig_post_only is enabled
ig_comment_user: Optional[str] = None # target a particular user for comments
ig_comment_url: Optional[str] = None # target a particular ig url for comments
ghost_post: Optional[bool] = True # enable posting to Ghost
sleep_short: Optional[int] = 5 # average duration of short intervals (a few seconds is adequate; this is to simulate doomscrolling latency)
sleep_long: Optional[int] = 180 # agerage duration of long intervals (this should be about a minute at least; it simulates the time it takes to write a comment or prepare a post)
IG_PROFILE = os.getenv("IG_PROFILE")
IG_SHORT_SLEEP = int(os.getenv("IG_SHORT_SLEEP", 5))
IG_LONG_SLEEP = int(os.getenv("IG_LONG_SLEEP", 180))
IG_POST_GHOST = os.getenv("IG_POST_GHOST")
IG_VISION_LLM = os.getenv("IG_VISION_LLM")
IG_PROMPT_LLM = os.getenv("IG_PROMPT_LLM")
IG_IMG_GEN = os.getenv("IG_IMG_GEN", "ComfyUI")
IG_OUTPUT_PLATFORMS = os.getenv("IG_OUTPUT_PLATFORMS", "ig,ghost,obsidian").split(',')
SD_WORKFLOWS_DIR = os.path.join(COMFYUI_DIR, 'workflows')
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
IG_PROFILES_DIR = os.path.join(BASE_DIR, 'profiles')
IG_PROFILE_DIR = os.path.join(IG_PROFILES_DIR, PROFILE)
IG_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'images')
IG_PROFILE_CONFIG_PATH = os.path.join(IG_PROFILE_DIR, f'config.json')
IG_VIEWED_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'downloads')
with open(IG_PROFILE_CONFIG_PATH, 'r') as config_file:
PROFILE_CONFIG = json.load(config_file)
if not os.path.exists(IG_IMAGES_DIR):
os.makedirs(IG_IMAGES_DIR )
OPENAI_API_KEY=PROFILE_CONFIG.get("openai_key")
###################
### VALIDATION ###
##################
if args.profile and args.posttype and not args.custompost and not args.posttype in PROFILE_CONFIG["posts"]:
print ("ERROR: NO SUCH POST TYPE IS AVAILABLE FOR THIS PROFILE.")
if args.profile and args.commenttype and not args.commenttype in PROFILE_CONFIG["comments"]:
print ("ERROR: NO SUCH COMMENT TYPE IS AVAILABLE FOR THIS PROFILE.")
####################
### CLIENT SETUP ###
####################
cl = igClient(request_timeout=1)
IMG_GEN = OpenAI(api_key=OPENAI_API_KEY)
IMG_MODEL = "dall-e-3"
COMFYUI_URL = "http://localhost:8188"
CLIENT_ID = str(uuid.uuid4())
###############################
### INSTAGRAM & GHOST SETUP ###
###############################
IG_USERNAME = PROFILE_CONFIG.get("ig_name")
IG_PASSWORD = PROFILE_CONFIG.get("ig_pass")
IG_SECRET_KEY = PROFILE_CONFIG.get("ig_2fa_secret")
IG_SESSION_PATH = os.path.join(IG_PROFILE_DIR, f'credentials.json')
GHOST_API_URL=PROFILE_CONFIG.get("ghost_admin_url")
GHOST_API_KEY=PROFILE_CONFIG.get("ghost_admin_api_key")
GHOST_CONTENT_KEY=PROFILE_CONFIG.get("ghost_content_key")
########################
### LLM PROMPT SETUP ###
########################
IMG_PROMPT_SYS = PROFILE_CONFIG.get("img_prompt_sys")
IMG_DESCRIPTION_SYS = PROFILE_CONFIG.get("img_description_sys")
COMMENT_PROMPT_SYS = PROFILE_CONFIG.get("img_comment_sys")
HASHTAGS = PROFILE_CONFIG.get("preferred_hashtags", [])
IMAGE_URL = args.image_url
rollover_time = 1702605780
COMPLETED_MEDIA_LOG = os.path.join(IG_PROFILE_DIR, f'completed-media.txt')
TOTP = pyotp.TOTP(IG_SECRET_KEY)
SHORT = args.shortsleep
LONG = args.longsleep
def follow_by_username(username) -> bool:
"""
Follow a user, return true if successful false if not.
"""
userid = cl.user_id_from_username(username)
sleep(SHORT)
return cl.user_follow(userid)
def unfollow_by_username(username) -> bool:
"""
Unfollow a user, return true if successful false if not.
"""
userid = cl.user_id_from_username(username)
sleep(SHORT)
return cl.user_unfollow(userid)
def get_poster_of_post(shortcode):
media_info = cl.media_info_by_shortcode(shortcode)
poster_username = media_info.user.username
return(poster_username)
def get_followers(amount: int = 0) -> Dict[int, UserShort]:
"""
Get followers, return Dict of user_id and User object
"""
return cl.user_followers(cl.user_id, amount=amount)
def get_followers_usernames(amount: int = 0) -> List[str]:
"""
Get bot's followers usernames, return List of usernames
"""
followers = cl.user_followers(cl.user_id, amount=amount)
sleep(SHORT)
return [user.username for user in followers.values()]
def get_following(amount: int = 0) -> Dict[int, UserShort]:
"""
Get bot's followed users, return Dict of user_id and User object
"""
sleep(SHORT)
return cl.user_following(cl.user_id, amount=amount)
def get_user_media(username, amount=30):
"""
Fetch recent media for a given username, return List of medias
"""
DEBUG(f"Fetching recent media for {username}...")
user_id = cl.user_id_from_username(username)
medias = cl.user_medias(user_id, amount)
final_medias = []
for media in medias:
sleep(SHORT)
if media.media_type == 1:
final_medias.append(media)
return final_medias
def get_user_image_urls(username, amount=30) -> List[str]:
"""
Fetch recent media URLs for a given username, return List of media URLs
"""
DEBUG(f"Fetching recent media URLs for {username}...")
user_id = cl.user_id_from_username(username)
medias = cl.user_medias(user_id, amount)
urls = []
for media in medias:
sleep(SHORT)
if media.media_type == 1 and media.thumbnail_url:
urls.append(media.thumbnail_url)
return urls
def is_valid_url(url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception:
return False
def get_random_follower():
followers = cl.get_followers_usernames()
sleep(SHORT)
return random.choice(followers)
def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = None, amount:int = args.count):
if not ht_type:
ht_type = args.commentmode
DEBUG(f"Fetching {ht_type} media for hashtag: {hashtag}")
ht_medias = []
while True:
sleep(SHORT)
if ht_type == "top":
ht_medias.extend(cl.hashtag_medias_top(name=hashtag, amount=amount*10))
elif ht_type == "recent":
ht_medias.extend(cl.hashtag_medias_recent(name=hashtag, amount=amount*10))
filtered_medias = filter_medias(ht_medias, days_ago_max=days_ago_max)
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}")
if len(filtered_medias) >= amount:
DEBUG(f"Desired amount of {amount} filtered media reached.")
break
return filtered_medias
def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int = args.count):
if not ht_type:
ht_type = args.commentmode
DEBUG(f"Fetching {ht_type} media.")
filtered_medias = []
while len(filtered_medias) < amount:
hashtag = random.choice(HASHTAGS)
DEBUG(f"Using hashtag: {hashtag}")
fetched_medias = []
sleep(SHORT)
if ht_type == "top":
fetched_medias = cl.hashtag_medias_top(name=hashtag, amount=50) # Fetch a large batch to filter from
elif ht_type == "recent":
fetched_medias = cl.hashtag_medias_recent(name=hashtag, amount=50) # Same for recent
current_filtered_medias = filter_medias(fetched_medias, days_ago_max=days_ago_max)
filtered_medias.extend(current_filtered_medias)
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}")
# Trim the list if we've collected more than needed
if len(filtered_medias) > amount:
filtered_medias = filtered_medias[:amount]
DEBUG(f"Desired amount of {amount} filtered media reached.")
break
else:
DEBUG(f"Total filtered media count so far: {len(filtered_medias)}")
return filtered_medias
def filter_medias(
medias: List,
like_count_min=None,
like_count_max=None,
comment_count_min=None,
comment_count_max=None,
days_ago_max=None,
):
# Adjust to use your preferred timezone, for example, UTC
days_back = date.now(pytz.utc) - timedelta(days=days_ago_max) if days_ago_max else None
return [
media for media in medias
if (
(like_count_min is None or media.like_count >= like_count_min) and
(like_count_max is None or media.like_count <= like_count_max) and
(comment_count_min is None or media.comment_count >= comment_count_min) and
(comment_count_max is None or media.comment_count <= comment_count_max) and
(days_ago_max is None or (media.taken_at and media.taken_at > days_back)) and not
check_media_in_completed_lists(media)
)
]
def add_media_to_completed_lists(media):
"""
Add a media to the completed lists after interacting with it.
"""
with open(COMPLETED_MEDIA_LOG, 'a') as file:
file.write(f"{str(media.pk)}\n")
def check_media_in_completed_lists(media):
"""
Check if a media is in the completed lists.
"""
with open(COMPLETED_MEDIA_LOG, 'r') as file:
completed_media = file.read().splitlines()
return str(media.pk) in completed_media
def download_and_resize_image(url: str, download_path: str = None, max_dimension: int = 1200) -> str:
if not isinstance(url, str):
url = str(url)
parsed_url = urlparse(url)
if not download_path or not os.path.isdir(os.path.dirname(download_path)):
_, temp_file_extension = os.path.splitext(parsed_url.path)
if not temp_file_extension:
temp_file_extension = ".jpg" # Default extension if none is found
download_path = tempfile.mktemp(suffix=temp_file_extension, prefix="download_")
if url and parsed_url.scheme and parsed_url.netloc:
try:
os.makedirs(os.path.dirname(download_path), exist_ok=True)
with requests.get(url) as response:
response.raise_for_status() # Raises an HTTPError if the response was an error
image = Image.open(BytesIO(response.content))
# Resize the image, preserving aspect ratio
if max(image.size) > max_dimension:
image.thumbnail((max_dimension, max_dimension))
# Save the image, preserving the original format if possible
image_format = image.format if image.format else "jpg"
image.save(download_path, image_format)
return download_path
except Exception as e:
# Handle or log the error as needed
DEBUG(f"Error downloading or resizing image: {e}")
return None
def comment_on_user_media(user: str, comment_type: str = "default", amount=5):
"""
Comment on a user's media.
"""
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr']
medias = get_user_media(user, amount)
for media in medias:
if not check_media_in_completed_lists(media):
sleep(SHORT)
if media.thumbnail_url and is_valid_url(media.thumbnail_url):
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
if media_path is not None:
encoded_media = encode_image_to_base64(media_path)
comment_text = llava(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if comment_text:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on media: {media.pk}")
else:
DEBUG(f"Failed to generate comment for media: {media.pk}")
add_media_to_completed_lists(media)
sleep(SHORT)
else:
DEBUG(f"We received a nonetype! {media_path}")
else:
DEBUG(f"URL for {media.pk} disappeared it seems...")
else:
DEBUG(f"Media already interacted with: {media.pk}")
def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, hashtag: str = None):
"""
Comment on a hashtag's media.
"""
if not hashtag:
hashtag = random.choice(PROFILE_CONFIG['comments'][comment_type]['hashtags'])
medias = get_medias_by_hashtag(hashtag=hashtag, days_ago_max=7, amount=amount)
for media in medias:
if not check_media_in_completed_lists(media):
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
comment_text = None
if media_path and os.path.exists(media_path):
encoded_media = encode_image_to_base64(media_path)
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if (PROFILE_CONFIG['comments'][comment_type]['sentiment'] == "positive") and False is True:
try:
like_result = cl.media_like(media)
if like_result:
DEBUG(f"Liked media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Cannot like media {media.pk}: {str(e)}")
if comment_text:
try:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Cannot comment on media {media.pk}: {str(e)}")
else:
DEBUG(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}")
add_media_to_completed_lists(media)
sleep(SHORT)
else:
DEBUG(f"Media already interacted with: {media.pk}")
def comment_on_specific_media(media_url, comment_type: str = "default"):
"""
Comment on a specific media given its URL.
"""
media_id = cl.media_pk_from_url(media_url)
sleep(SHORT)
media = cl.media_info(media_id)
sleep(SHORT)
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
encoded_media = encode_image_to_base64(media_path)
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if comment_text:
try:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on specific media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}")
else:
DEBUG(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/")
def get_image(status_data, key):
"""Extract the filename and subfolder from the status data and read the file."""
try:
outputs = status_data.get("outputs", {})
images_info = outputs.get(key, {}).get("images", [])
if not images_info:
raise Exception("No images found in the job output.")
image_info = images_info[0] # Assuming the first image is the target
filename = image_info.get("filename")
subfolder = image_info.get("subfolder", "") # Default to empty if not present
file_path = os.path.join(COMFYUI_OUTPUT_DIR, subfolder, filename)
with open(file_path, 'rb') as file:
return file.read()
except KeyError as e:
raise Exception(f"Failed to extract image information due to missing key: {e}")
except FileNotFoundError:
raise Exception(f"File {filename} not found at the expected path {file_path}")
def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], path=None):
if path is None:
path = []
try:
if isinstance(workflow, dict):
for key, value in workflow.items():
current_path = path + [key]
if isinstance(value, dict):
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
found_key[0] = key
update_prompt(value, post, positive, found_key, current_path)
elif isinstance(value, list):
# Recursive call with updated path for each item in a list
for index, item in enumerate(value):
update_prompt(item, post, positive, found_key, current_path + [str(index)])
if value == "API_PPrompt":
workflow[key] = post.get(value, "") + positive
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
elif value == "API_SPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
elif value == "API_NPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999)
DEBUG(f"Updated seed to: {workflow[key]}")
elif (key == "width" or key == "max_width" or key == "scaled_width" or key == "side_length") and (value == 1023 or value == 1025):
# workflow[key] = post.get(value, "")
workflow[key] = post.get("width", 1024)
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
# workflow[key] = post.get(value, "")
workflow[key] = post.get("height", 1024)
except Exception as e:
DEBUG(f"Error in update_prompt at path {' -> '.join(path)}: {e}")
raise
return found_key[0]
def update_prompt_custom(workflow: dict, API_PPrompt: str, API_SPrompt: str, API_NPrompt: str, found_key=[None], path=None):
if path is None:
path = []
try:
if isinstance(workflow, dict):
for key, value in workflow.items():
current_path = path + [key]
if isinstance(value, dict):
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
found_key[0] = key
update_prompt(value, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path)
elif isinstance(value, list):
# Recursive call with updated path for each item in a list
for index, item in enumerate(value):
update_prompt(item, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path + [str(index)])
if value == "API_PPrompt":
workflow[key] = API_PPrompt
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
elif value == "API_SPrompt":
workflow[key] = API_SPrompt
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
elif value == "API_NPrompt":
workflow[key] = API_NPrompt
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999)
DEBUG(f"Updated seed to: {workflow[key]}")
elif (key == "width" or key == "max_width" or key == "scaled_width") and (value == 1023 or value == 1025):
workflow[key] = 1024
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
workflow[key] = 1024
except Exception as e:
DEBUG(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}")
raise
return found_key[0]
##################################
### IMAGE GENERATION FUNCTIONS ###
##################################
def image_gen(prompt: str, model: str):
response = IMG_GEN.images.generate(
model=model,
prompt=prompt,
size="1024x1024",
quality="standard",
n=1,
)
image_url = response.data[0].url
image_path = download_and_resize_image(image_url)
return image_path
def queue_prompt(prompt: dict):
response = requests.post(f"{COMFYUI_URL}/prompt", json={"prompt": prompt, "client_id": CLIENT_ID})
if response.status_code == 200:
return response.json().get('prompt_id')
else:
raise Exception(f"Failed to queue prompt. Status code: {response.status_code}, Response body: {response.text}")
def poll_status(prompt_id):
"""Poll the job status until it's complete and return the status data."""
start_time = time.time() # Record the start time
while True:
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
# Use \r to return to the start of the line, and end='' to prevent newline
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
if status_response.status_code != 200:
raise Exception("Failed to get job status")
status_data = status_response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
DEBUG()
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
time.sleep(1)
def poll_status(prompt_id):
"""Poll the job status until it's complete and return the status data."""
start_time = time.time() # Record the start time
while True:
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
# Use \r to return to the start of the line, and end='' to prevent newline
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
if status_response.status_code != 200:
raise Exception("Failed to get job status")
status_data = status_response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
DEBUG()
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
time.sleep(1)
################################
### PRIMARY ACTIVE FUNCTIONS ###
################################
def load_post(chosen_post: str = "default"):
if chosen_post in PROFILE_CONFIG['posts']:
post = PROFILE_CONFIG['posts'][chosen_post]
DEBUG(f"Loaded post for {chosen_post}")
else:
DEBUG(f"Unable to load post for {chosen_post}. Choosing a default post.")
chosen_post = choose_post(PROFILE_CONFIG['posts'])
post = PROFILE_CONFIG['posts'][chosen_post]
DEBUG(f"Defaulted to {chosen_post}")
return post
def handle_image_workflow(chosen_post=None):
"""
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
or posting to Instagram based on the local flag.
"""
if chosen_post is None:
chosen_post = choose_post(PROFILE_CONFIG['posts'])
post = load_post(chosen_post)
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
DEBUG(f"Workflow name: {workflow_name}")
DEBUG(f"Generating image concept for {chosen_post} and {workflow_name} now.")
image_concept = query_ollama(llmPrompt = post['llmPrompt'], max_tokens = 180) if args.local or not args.openai else query_gpt4(llmPrompt = post['llmPrompt'], max_tokens = 180)
DEBUG(f"Image concept for {chosen_post}: {image_concept}")
workflow_data = None
if args.fast:
workflow_data = load_json(None, f"{workflow_name}_fast")
if workflow_data is None:
workflow_data = load_json(None, workflow_name)
if args.dalle and not args.local:
jpg_file_path = image_gen(image_concept, "dall-e-3")
else:
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
DEBUG(f"Saved file key: {saved_file_key}")
prompt_id = queue_prompt(workflow_data)
DEBUG(f"Prompt ID: {prompt_id}")
status_data = poll_status(prompt_id)
image_data = get_image(status_data, saved_file_key)
if chosen_post == "landscape":
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 2880, 100)
else:
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
image_aftergen(jpg_file_path, chosen_post)
def handle_custom_image(custom_post: str):
"""
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
or posting to Instagram based on the local flag.
"""
if args.posttype:
post = load_post(args.posttype)
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
else:
workflow_name = args.workflow if args.workflow else "selfie"
post = {
"API_PPrompt": "",
"API_SPrompt": "; (((masterpiece))); (beautiful lighting:1), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3",
"Vision_Prompt": "Write an upbeat Instagram description with emojis to accompany this selfie!",
"frequency": 2,
"ghost_tags": [
"aigenerated",
"stablediffusion",
"sdxl",
],
}
workflow_data = load_json(None, workflow_name)
system_msg = "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words."
image_concept = query_ollama(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) if args.local or not args.openai else query_gpt4(system_msg=system_msg, user_msg=custom_post, max_tokens = 180)
DEBUG(f"Image concept: {image_concept}")
if args.dalle and not args.local:
jpg_file_path = image_gen(image_concept, "dall-e-3")
else:
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
DEBUG(f"Saved file key: {saved_file_key}")
prompt_id = queue_prompt(workflow_data)
DEBUG(f"Prompt ID: {prompt_id}")
status_data = poll_status(prompt_id)
image_data = get_image(status_data, saved_file_key)
chosen_post = args.posttype if args.posttype else "custom"
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
encoded_string = encode_image_to_base64(jpg_file_path)
vision_prompt = f"Write upbeat Instagram description accompany this image, which was created by AI using the following prompt: {image_concept}"
instagram_description = llava(encoded_string, vision_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, vision_prompt, 150)
image_aftergen(jpg_file_path, chosen_post, )
def image_aftergen(jpg_file_path: str, chosen_post: str = None, post: Dict = None, prompt: str = None):
if chosen_post and not prompt:
prompt = PROFILE_CONFIG['posts'][chosen_post]['Vision_Prompt']
encoded_string = encode_image_to_base64(jpg_file_path)
DEBUG(f"Image successfully encoded from {jpg_file_path}")
instagram_description = llava(encoded_string, prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, prompt, 150)
instagram_description = re.sub(r'^["\'](.*)["\']$', r'\1', instagram_description)
ghost_tags = post['ghost_tags'] if post else PROFILE_CONFIG['posts'][chosen_post]['ghost_tags']
title_prompt = f"Generate a short 3-5 word title for this image, which already includes the following description: {instagram_description}"
# Generate img_title based on the condition provided
img_title = llava(encoded_string, title_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, title_prompt, 150)
img_title = re.sub(r'^["\'](.*)["\']$', r'\1', img_title)
# Save description to file and upload or save locally
description_filename = jpg_file_path.rsplit('.', 1)[0] + ".txt"
description_path = os.path.join(IG_IMAGES_DIR, description_filename)
with open(description_path, "w") as desc_file:
desc_file.write(instagram_description)
# Initial markdown content creation
markdown_filename = jpg_file_path.rsplit('.', 1)[0] + ".md"
markdown_content = f"""# {img_title}
![{img_title}]({jpg_file_path})
---
{instagram_description}
---
Tags: {', '.join(ghost_tags)}
"""
with open(markdown_filename, "w") as md_file:
md_file.write(markdown_content)
DEBUG(f"Markdown file created at {markdown_filename}")
if args.wallpaper:
change_wallpaper(jpg_file_path)
DEBUG(f"Wallpaper changed.")
if not args.local:
ig_footer = ""
if not args.noig:
post_url = upload_photo(jpg_file_path, instagram_description)
DEBUG(f"Image posted at {post_url}")
ig_footer = f"\n<a href=\"{post_url}\">Instagram link</a>"
if not args.noghost:
ghost_text = f"{instagram_description}"
ghost_url = post_to_ghost(img_title, jpg_file_path, ghost_text, ghost_tags)
DEBUG(f"Ghost post: {ghost_url}\n{ig_footer}")
def choose_post(posts):
total_frequency = sum(posts[post_type]['frequency'] for post_type in posts)
random_choice = random.randint(1, total_frequency)
current_sum = 0
for post_type, post_info in posts.items():
current_sum += post_info['frequency']
if random_choice <= current_sum:
return post_type
def load_json(json_payload, workflow):
if json_payload:
return json.loads(json_payload)
elif workflow:
workflow_path = os.path.join(SD_WORKFLOWS_DIR, f"{workflow}.json" if not workflow.endswith('.json') else workflow)
with open(workflow_path, 'r') as file:
return json.load(file)
else:
raise ValueError("No valid input provided.")
def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, quality=80):
chosen_post = chosen_post if chosen_post else "custom"
filename_png = f"{prompt_id}.png"
category_dir = os.path.join(IG_IMAGES_DIR, chosen_post)
image_path_png = os.path.join(category_dir, filename_png)
try:
# Ensure the directory exists
os.makedirs(category_dir, exist_ok=True)
# Save the raw PNG data to a file
with open(image_path_png, 'wb') as file:
file.write(image_data)
# Open the PNG, resize it, and save it as jpg
with Image.open(image_path_png) as img:
# Resize image if necessary
if max(img.size) > max_size:
ratio = max_size / max(img.size)
new_size = tuple([int(x * ratio) for x in img.size])
img = img.resize(new_size, Image.Resampling.LANCZOS)
# Prepare the path for the converted image
new_file_name = f"{prompt_id}.jpg"
new_file_path = os.path.join(category_dir, new_file_name)
# Convert to jpg and save
img.convert('RGB').save(new_file_path, format='JPEG', quality=quality)
# Optionally, delete the temporary PNG file
os.remove(image_path_png)
return new_file_path
except Exception as e:
DEBUG(f"Error processing image: {e}")
return None
def upload_photo(path, caption, title: str=None):
DEBUG(f"Uploading photo from {path}...")
media = cl.photo_upload(path, caption)
post_url = f"https://www.instagram.com/p/{media.code}/"
return post_url
def format_duration(seconds):
"""Return a string representing the duration in a human-readable format."""
if seconds < 120:
return f"{int(seconds)} sec"
elif seconds < 6400:
return f"{int(seconds // 60)} min"
else:
return f"{seconds / 3600:.2f} hr"
########################
### HELPER FUNCTIONS ###
########################
import subprocess
def change_wallpaper(image_path):
command = """
osascript -e 'tell application "Finder" to set desktop picture to POSIX file "{}"'
""".format(image_path)
subprocess.run(command, shell=True)
def sleep(seconds):
"""Sleep for a random amount of time, approximately the given number of seconds."""
sleepupto(seconds*0.66, seconds*1.5)
def sleepupto(min_seconds, max_seconds=None):
interval = random.uniform(min_seconds if max_seconds is not None else 0, max_seconds if max_seconds is not None else min_seconds)
start_time = time.time()
end_time = start_time + interval
with tqdm(total=interval, desc=f"Sleeping for {format_duration(interval)}", unit=" sec", ncols=75, bar_format='{desc}: {bar} {remaining}') as pbar:
while True:
current_time = time.time()
elapsed_time = current_time - start_time
remaining_time = end_time - current_time
if elapsed_time >= interval:
break
duration = min(1, interval - elapsed_time) # Adjust sleep time to not exceed interval
time.sleep(duration)
pbar.update(duration)
# Update remaining time display
pbar.set_postfix_str(f"{format_duration(remaining_time)} remaining")
########################
### GHOST FUNCTIONS ###
########################
def generate_jwt_token():
key_id, key_secret = GHOST_API_KEY.split(':')
iat = int(date.now().timestamp())
exp = iat + 5 * 60 # Token expiration time set to 5 minutes from now for consistency with the working script
payload = {
'iat': iat,
'exp': exp,
'aud': '/admin/' # Adjusted to match the working script
}
token = jwt.encode(payload, bytes.fromhex(key_secret), algorithm='HS256', headers={'kid': key_id})
return token.decode('utf-8') if isinstance(token, bytes) else token # Ensure the token is decoded to UTF-8 string
def post_to_ghost(title, image_path, html_content, ghost_tags):
jwt_token = generate_jwt_token()
ghost_headers = {'Authorization': f'Ghost {jwt_token}'}
# Upload the image to Ghost
with open(image_path, 'rb') as f:
files = {'file': (os.path.basename(image_path), f, 'image/jpg')}
image_response = requests.post(f"{GHOST_API_URL}/images/upload/", headers=ghost_headers, files=files)
image_response.raise_for_status() # Ensure the request was successful
image_url = image_response.json()['images'][0]['url']
# Prepare the post content
updated_html_content = f'<img src="{image_url}" alt="Image"/><hr/> {html_content}'
mobiledoc = {
"version": "0.3.1",
"atoms": [],
"cards": [["html", {"cardName": "html", "html": updated_html_content}]],
"markups": [],
"sections": [[10, 0]]
}
mobiledoc = json.dumps(mobiledoc)
post_data = {
'posts': [{
'title': title,
'mobiledoc': mobiledoc,
'status': 'published',
'tags': ghost_tags
}]
}
# Create a new post
post_response = requests.post(f"{GHOST_API_URL}/posts/", json=post_data, headers=ghost_headers)
post_response.raise_for_status()
post_url = post_response.json()['posts'][0]['url']
return post_url
########################################################
@ig.post("/ig/flow")
async def ig_flow_endpoint(new_session: bool = False):
current_unix_time = int(date.now().timestamp())
time_since_rollover = current_unix_time - rollover_time
time_remaining = 30 - (time_since_rollover % 30)
if time_remaining < 4:
DEBUG("Too close to end of TOTP counter. Waiting.")
sleepupto(5, 5)
if not new_session and os.path.exists(IG_SESSION_PATH):
cl.load_settings(IG_SESSION_PATH)
DEBUG("Loaded past session.")
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
cl.dump_settings(IG_SESSION_PATH)
DEBUG("Logged in and saved new session.")
else:
raise Exception(f"Failed to login as {IG_USERNAME}.")

484
sijapi/routers/llm.py Normal file
View file

@ -0,0 +1,484 @@
#routers/llm.py
from fastapi import APIRouter, HTTPException, Request, Response
from fastapi.responses import StreamingResponse, JSONResponse
from starlette.responses import StreamingResponse
from datetime import datetime as dt_datetime
from dateutil import parser
from typing import List, Dict, Any, Union
from pydantic import BaseModel, root_validator, ValidationError
import aiofiles
import os
import glob
import chromadb
from openai import OpenAI
import uuid
import json
import base64
from pathlib import Path
import ollama
from ollama import AsyncClient as Ollama, list as OllamaList
import aiofiles
import time
import asyncio
from pathlib import Path
from fastapi import FastAPI, Request, HTTPException, APIRouter
from fastapi.responses import JSONResponse, StreamingResponse
from dotenv import load_dotenv
from sijapi import BASE_DIR, DATA_DIR, LOGS_DIR, CONFIG_DIR, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import convert_to_unix_time, sanitize_filename
llm = APIRouter()
# Initialize chromadb client
client = chromadb.Client()
OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
# Function to read all markdown files in the folder
def read_markdown_files(folder: Path):
file_paths = glob.glob(os.path.join(folder, "*.md"))
documents = []
for file_path in file_paths:
with open(file_path, 'r', encoding='utf-8') as file:
documents.append(file.read())
return documents, file_paths
# Read markdown files and generate embeddings
documents, file_paths = read_markdown_files(DOC_DIR)
for i, doc in enumerate(documents):
response = ollama.embeddings(model="mxbai-embed-large", prompt=doc)
embedding = response["embedding"]
OBSIDIAN_CHROMADB_COLLECTION.add(
ids=[file_paths[i]],
embeddings=[embedding],
documents=[doc]
)
# Function to retrieve the most relevant document given a prompt
@llm.get("/retrieve_document/{prompt}")
async def retrieve_document(prompt: str):
response = ollama.embeddings(
prompt=prompt,
model="mxbai-embed-large"
)
results = OBSIDIAN_CHROMADB_COLLECTION.query(
query_embeddings=[response["embedding"]],
n_results=1
)
return {"document": results['documents'][0][0]}
# Function to generate a response using RAG
@llm.get("/generate_response/{prompt}")
async def generate_response(prompt: str):
data = await retrieve_document(prompt)
output = ollama.generate(
model="llama2",
prompt=f"Using this data: {data['document']}. Respond to this prompt: {prompt}"
)
return {"response": output['response']}
async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, max_tokens: int = 200):
messages = [{"role": "system", "content": sys},
{"role": "user", "content": usr}]
LLM = Ollama()
response = await LLM.chat(model=DEFAULT_LLM, messages=messages, options={"num_predict": max_tokens})
DEBUG(response)
if "message" in response:
if "content" in response["message"]:
content = response["message"]["content"]
return content
else:
DEBUG("No choices found in response")
return None
def is_vision_request(content):
return False
@llm.post("/v1/chat/completions")
async def chat_completions(request: Request):
body = await request.json()
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f")
filename = REQUESTS_DIR / f"request_{timestamp}.json"
async with aiofiles.open(filename, mode='w') as file:
await file.write(json.dumps(body, indent=4))
messages = body.get('messages')
if not messages:
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
requested_model = body.get('model', 'default-model')
DEBUG(f"Requested model: {requested_model}")
stream = body.get('stream')
token_limit = body.get('max_tokens') or body.get('num_predict')
# Check if the most recent message contains an image_url
recent_message = messages[-1]
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
DEBUG("Processing as a vision request")
model = "llava"
DEBUG(f"Using model: {model}")
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
else:
DEBUG("Processing as a standard request")
model = requested_model
DEBUG(f"Using model: {model}")
if stream:
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
else:
response_data = await generate_messages(messages, model)
return JSONResponse(response_data, media_type="application/json")
async def stream_messages(messages: list, model: str = "llama3", num_predict: int = 300):
async with Ollama() as async_client:
try:
index = 0
async for part in async_client.chat(model=model, messages=messages, stream=True, options={'num_predict': num_predict}):
yield "data: " + json.dumps({
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": index,
"delta": {"role": "assistant", "content": part['message']['content']},
"logprobs": None,
"finish_reason": None if 'finish_reason' not in part else part['finish_reason']
}]
}) + "\n\n"
index += 1
except Exception as e:
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
yield "data: [DONE]\n\n"
async def stream_messages_with_vision(message: dict, model: str, num_predict: int = 300):
async with Ollama() as async_client:
try:
if isinstance(message.get('content'), list):
content = message['content']
for part in content:
if part['type'] == 'image_url' and 'url' in part['image_url']:
image_url = part['image_url']['url']
if image_url.startswith('data:image'):
# Convert base64 to bytes
image_data = base64.b64decode(image_url.split('base64,')[1])
response_generator = await async_client.generate(
model=model,
prompt='explain this image:',
images=[image_data],
stream=True,
options={'num_predict': num_predict}
)
index = 0
async for response in response_generator:
yield "data: " + json.dumps({
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": index,
"delta": {"role": "assistant", "content": response['response']},
"logprobs": None,
"finish_reason": None if 'finish_reason' not in response else response['finish_reason']
}]
}) + "\n\n"
index += 1
except Exception as e:
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
yield "data: [DONE]\n\n"
def get_appropriate_model(requested_model):
if requested_model == "gpt-4-vision-preview":
return DEFAULT_VISION
elif not is_model_available(requested_model):
return DEFAULT_LLM
else:
return requested_model
def is_vision_request(content):
if isinstance(content, list):
return any(isinstance(msg, dict) and msg.get('type') == 'image_url' for msg in content)
return False
@llm.get("/v1/models")
async def get_models():
model_data = OllamaList()
formatted_models = []
for model in model_data['models']:
model_id = model['name'].split(':')[0]
formatted_models.append({
"id": model_id,
"object": "model",
"created": convert_to_unix_time(model['modified_at']),
"owned_by": "sij"
})
return JSONResponse({
"object": "list",
"data": formatted_models
})
async def generate_messages(messages: list, model: str = "llama3"):
async_client = Ollama()
try:
response = await async_client.chat(model=model, messages=messages, stream=False)
return {
"model": model,
"choices": [{
"message": {
"role": "assistant",
"content": response['message']['content']
}
}]
}
except Exception as e:
return {"error": f"Error: {str(e)}"}
def is_model_available(model_name):
model_data = OllamaList()
available_models = [model['name'] for model in model_data['models']]
DEBUG(f"Available models: {available_models}") # Log using the configured LOGGER
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
if len(matching_models) == 1:
DEBUG(f"Unique match found: {matching_models[0]}")
return True
elif len(matching_models) > 1:
WARN(f"Ambiguous match found, models: {matching_models}")
return True
else:
WARN(f"No match found for model: {model_name}")
return False
@llm.options("/chat/completions")
@llm.options("/v1/chat/completions")
async def chat_completions_options(request: Request):
return JSONResponse(
content={
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "To use the chat completions endpoint, make a POST request to /v1/chat/completions with a JSON payload containing the 'messages' array. Each message should have a 'role' (either 'system', 'user', or 'assistant') and 'content' (the message text). You can optionally specify the 'model' to use. The response will be a JSON object containing the generated completions."
},
"finish_reason": "stop"
}
],
"created": int(time.time()),
"id": str(uuid.uuid4()),
"model": DEFAULT_LLM,
"object": "chat.completion.chunk",
},
status_code=200,
headers={
"Accept": "application/json",
"Content-Type": "application/json",
"Allow": "OPTIONS, POST",
},
)
#### EMBEDDINGS
class EmbeddingRequest(BaseModel):
model: str
input: Union[str, List[str], None] = None
prompt: Union[str, List[str], None] = None
@root_validator(pre=True)
def ensure_list(cls, values):
input_value = values.get('input')
prompt_value = values.get('prompt')
if input_value and isinstance(input_value, str):
values['input'] = [input_value]
if prompt_value and isinstance(prompt_value, str):
values['prompt'] = [prompt_value]
if input_value and not prompt_value:
values['prompt'] = values['input']
values['input'] = None
return values
class EmbeddingResponse(BaseModel):
object: str
data: List[Dict[str, Any]]
model: str
usage: Dict[str, int]
@llm.post("/api/embeddings", response_model=EmbeddingResponse)
@llm.post("/v1/embeddings", response_model=EmbeddingResponse)
async def create_embedding(request: EmbeddingRequest):
try:
combined_input = " ".join(request.prompt)
response = ollama.embeddings(model=request.model, prompt=combined_input)
embedding_list = response.get("embedding", [])
data = [{
"object": "embedding",
"index": 0,
"embedding": embedding_list
}]
result = {
"object": "list",
"data": data,
"model": request.model,
"usage": {"prompt_tokens": 5, "total_tokens": 5} # Example token counts
}
return result
except ValidationError as e:
raise HTTPException(status_code=422, detail=e.errors())
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@llm.options("/api/embeddings")
@llm.options("/v1/embeddings")
async def options_embedding():
return JSONResponse(
content={},
headers={
"Allow": "OPTIONS, POST",
"Content-Type": "application/json",
"Access-Control-Allow-Methods": "OPTIONS, POST",
"Access-Control-Allow-Headers": "Content-Type"
}
)
###### PORTED FROM IGBOT, NEEDS TO BE UPDATED FOR THIS ENVIRONMENT AND MADE ASYNC: #####
def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", max_tokens: int = 150):
messages = llmPrompt if llmPrompt else [
{"role": "system", "content": system_msg},
{"role": "user", "content": user_msg}
]
LLM = OpenAI(api_key=OPENAI_API_KEY)
response = LLM.chat.completions.create(
model="gpt-4",
messages=messages,
max_tokens=max_tokens
)
if hasattr(response, "choices") and response.choices: # Checks if 'choices' attribute exists and is not empty
first_choice = response.choices[0]
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
return first_choice.message.content
else:
DEBUG("No content attribute in the first choice's message")
DEBUG(f"No content found in message string: {response.choices}")
DEBUG("Trying again!")
query_gpt4(messages, max_tokens)
else:
DEBUG(f"No content found in message string: {response}")
return ""
def llava(image_base64, prompt):
VISION_LLM = Ollama(host='http://localhost:11434')
response = VISION_LLM.generate(
model = 'llava',
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
images = [image_base64]
)
DEBUG(response)
return "" if "pass" in response["response"].lower() else response["response"]
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
VISION_LLM = OpenAI(api_key=OPENAI_API_KEY)
response_1 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "system",
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
},
{
"role": "user",
"content": [
{"type": "text", "text": f"{prompt_usr}"},
{"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}}
],
}
],
max_tokens=max_tokens,
stream=False
)
if response_1 and response_1.choices:
if len(response_1.choices) > 0:
first_choice = response_1.choices[0]
if first_choice.message and first_choice.message.content:
comment_content = first_choice.message.content
if "PASS" in comment_content:
return ""
DEBUG(f"Generated comment: {comment_content}")
response_2 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "system",
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
},
{
"role": "user",
"content": [
{"type": "text", "text": f"{prompt_usr}"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpg;base64,{image_base64}"
},
},
],
},
{
"role": "assistant",
"content": comment_content
},
{
"role": "user",
"content": "Please refine it, and remember to ONLY include the caption or comment, nothing else! That means no preface, no postscript, no notes, no reflections, and not even any acknowledgment of this follow-up message. I need to be able to use your output directly on social media. Do include emojis though."
}
],
max_tokens=max_tokens,
stream=False
)
if response_2 and response_2.choices:
if len(response_2.choices) > 0:
first_choice = response_2.choices[0]
if first_choice.message and first_choice.message.content:
final_content = first_choice.message.content
DEBUG(f"Generated comment: {final_content}")
if "PASS" in final_content:
return ""
else:
return final_content
DEBUG("Vision response did not contain expected data.")
DEBUG(f"Vision response: {response_1}")
asyncio.sleep(15)
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
return try_again

524
sijapi/routers/locate.py Normal file
View file

@ -0,0 +1,524 @@
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import HTMLResponse, JSONResponse
import requests
import json
import time
import pytz
import traceback
from datetime import datetime, timezone
from typing import Union, List
import asyncio
import pytz
import folium
import time as timer
from pathlib import Path
from pydantic import BaseModel
from typing import Optional, Any, Dict, List, Union
from datetime import datetime, timedelta, time
from sijapi import LOCATION_OVERRIDES, TZ
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import get_db_connection, haversine, localize_dt
# from osgeo import gdal
# import elevation
locate = APIRouter()
class Location(BaseModel):
latitude: float
longitude: float
datetime: datetime
elevation: Optional[float] = None
altitude: Optional[float] = None
zip: Optional[str] = None
street: Optional[str] = None
city: Optional[str] = None
state: Optional[str] = None
country: Optional[str] = None
context: Optional[Dict[str, Any]] = None
class Config:
json_encoders = {
datetime: lambda dt: dt.isoformat(),
}
def reverse_geocode(latitude, longitude) -> Dict:
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={latitude}&lon={longitude}"
INFO(f"Calling Nominatim API at {url}")
headers = {
'User-Agent': 'sij.law/1.0 (sij@sij.law)', # replace with your app name and email
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an exception for unsuccessful requests
data = response.json()
result = {
"latitude": data.get("lat", ""),
"longitude": data.get("lon", ""),
"class": data.get("class", ""),
"type": data.get("type", ""),
"name": data.get("name", ""),
"display_name": data.get("display_name", ""),
"boundingbox": data.get("boundingbox", [])
}
if "address" in data:
address = data["address"]
result.update({
"amenity": address.get("amenity", ""),
"house_number": address.get("house_number", ""),
"road": address.get("road", ""),
"quarter": address.get("quarter", ""),
"neighbourhood": address.get("neighbourhood", ""),
"suburb": address.get("suburb", ""),
"county": address.get("county", ""),
"city": address.get("city", ""),
"state": address.get("state", ""),
"postcode": address.get("postcode", ""),
"country": address.get("country", ""),
"country_code": address.get("country_code", "")
})
INFO(f"{result}")
return result
except requests.exceptions.RequestException as e:
ERR(f"Error: {e}")
return None
## NOT YET IMPLEMENTED
async def geocode(zip_code: Optional[str] = None, latitude: Optional[float] = None, longitude: Optional[float] = None, city: Optional[str] = None, state: Optional[str] = None, country_code: str = 'US') -> Location:
if (latitude is None or longitude is None) and (zip_code is None) and (city is None or state is None):
ERR(f"Must provide sufficient information for geocoding!")
return None
try:
# Establish the database connection
conn = get_db_connection()
# Build the SQL query based on the provided parameters
query = "SELECT id, street, city, state, country, latitude, longitude, zip, elevation, datetime, date, ST_Distance(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326)) AS distance FROM Locations"
conditions = []
params = []
if latitude is not None and longitude is not None:
conditions.append("ST_DWithin(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326), 50000)") # 50 km radius
params.extend([longitude, latitude])
if zip_code:
conditions.append("zip = $3 AND country = $4")
params.extend([zip_code, country_code])
if city and state:
conditions.append("city ILIKE $5 AND state ILIKE $6 AND country = $7")
params.extend([city, state, country_code])
if conditions:
query += " WHERE " + " OR ".join(conditions)
query += " ORDER BY distance LIMIT 1;"
DEBUG(f"Executing query: {query} with params: {params}")
# Execute the query with the provided parameters
result = await conn.fetchrow(query, *params)
# Close the connection
await conn.close()
if result:
location_info = Location(
latitude=result['latitude'],
longitude=result['longitude'],
datetime=result.get['datetime'],
zip=result['zip'],
street=result.get('street', ''),
city=result['city'],
state=result['state'],
country=result['country'],
elevation=result.get('elevation', 0),
distance=result.get('distance')
)
DEBUG(f"Found location: {location_info}")
return location_info
else:
DEBUG("No location found with provided parameters.")
return Location()
except Exception as e:
ERR(f"Error occurred: {e}")
raise Exception("An error occurred while processing your request")
def find_override_locations(lat: float, lon: float) -> Optional[str]:
# Load the JSON file
with open(LOCATION_OVERRIDES, 'r') as file:
locations = json.load(file)
closest_location = None
closest_distance = float('inf')
# Iterate through each location entry in the JSON
for location in locations:
loc_name = location.get("name")
loc_lat = location.get("latitude")
loc_lon = location.get("longitude")
loc_radius = location.get("radius")
# Calculate distance using haversine
distance = haversine(lat, lon, loc_lat, loc_lon)
# Check if the distance is within the specified radius
if distance <= loc_radius:
if distance < closest_distance:
closest_distance = distance
closest_location = loc_name
return closest_location
def get_elevation(latitude, longitude):
url = "https://api.open-elevation.com/api/v1/lookup"
payload = {
"locations": [
{
"latitude": latitude,
"longitude": longitude
}
]
}
try:
response = requests.post(url, json=payload)
response.raise_for_status() # Raise an exception for unsuccessful requests
data = response.json()
if "results" in data:
elevation = data["results"][0]["elevation"]
return elevation
else:
return None
except requests.exceptions.RequestException as e:
ERR(f"Error: {e}")
return None
async def fetch_locations(start: datetime, end: datetime = None) -> List[Location]:
start_datetime = localize_dt(start)
if end is None:
end_datetime = localize_dt(start_datetime.replace(hour=23, minute=59, second=59))
else:
end_datetime = localize_dt(end)
if start_datetime.time() == datetime.min.time() and end.time() == datetime.min.time():
end_datetime = end_datetime.replace(hour=23, minute=59, second=59)
DEBUG(f"Fetching locations between {start_datetime} and {end_datetime}")
conn = await get_db_connection()
locations = []
# Check for records within the specified datetime range
range_locations = await conn.fetch('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street,
action, device_type, device_model, device_name, device_os
FROM locations
WHERE datetime >= $1 AND datetime <= $2
ORDER BY datetime DESC
''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None))
DEBUG(f"Range locations query returned: {range_locations}")
locations.extend(range_locations)
if not locations and (end is None or start_datetime.date() == end.date()):
location_data = await conn.fetchrow('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street,
action, device_type, device_model, device_name, device_os
FROM locations
WHERE datetime < $1
ORDER BY datetime DESC
LIMIT 1
''', start_datetime.replace(tzinfo=None))
DEBUG(f"Fallback query returned: {location_data}")
if location_data:
locations.append(location_data)
await conn.close()
DEBUG(f"Locations found: {locations}")
# Sort location_data based on the datetime field in descending order
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
# Create Location objects directly from the location data
location_objects = [Location(
latitude=loc['latitude'],
longitude=loc['longitude'],
datetime=loc['datetime'],
elevation=loc.get('elevation'),
city=loc.get('city'),
state=loc.get('state'),
zip=loc.get('zip'),
street=loc.get('street'),
context={
'action': loc.get('action'),
'device_type': loc.get('device_type'),
'device_model': loc.get('device_model'),
'device_name': loc.get('device_name'),
'device_os': loc.get('device_os')
}
) for loc in sorted_locations if loc['latitude'] is not None and loc['longitude'] is not None]
return location_objects if location_objects else []
# Function to fetch the last location before the specified datetime
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
datetime = localize_dt(datetime)
DEBUG(f"Fetching last location before {datetime}")
conn = await get_db_connection()
location_data = await conn.fetchrow('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street, country,
action
FROM locations
WHERE datetime < $1
ORDER BY datetime DESC
LIMIT 1
''', datetime.replace(tzinfo=None))
await conn.close()
if location_data:
DEBUG(f"Last location found: {location_data}")
return Location(**location_data)
else:
DEBUG("No location found before the specified datetime")
return None
@locate.get("/map/start_date={start_date_str}&end_date={end_date_str}", response_class=HTMLResponse)
async def generate_map_endpoint(start_date_str: str, end_date_str: str):
try:
start_date = localize_dt(start_date_str)
end_date = localize_dt(end_date_str)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
html_content = await generate_map(start_date, end_date)
return HTMLResponse(content=html_content)
@locate.get("/map", response_class=HTMLResponse)
async def generate_alltime_map_endpoint():
try:
start_date = localize_dt(datetime.fromisoformat("2022-01-01"))
end_date = localize_dt(datetime.now())
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
html_content = await generate_map(start_date, end_date)
return HTMLResponse(content=html_content)
async def generate_map(start_date: datetime, end_date: datetime):
locations = await fetch_locations(start_date, end_date)
if not locations:
raise HTTPException(status_code=404, detail="No locations found for the given date range")
# Create a folium map centered around the first location
map_center = [locations[0].latitude, locations[0].longitude]
m = folium.Map(location=map_center, zoom_start=5)
# Add markers for each location
for loc in locations:
folium.Marker(
location=[loc.latitude, loc.longitude],
popup=f"{loc.city}, {loc.state}<br>Elevation: {loc.elevation}m<br>Date: {loc.datetime}",
tooltip=f"{loc.city}, {loc.state}"
).add_to(m)
# Save the map to an HTML file and return the HTML content
map_html = "map.html"
m.save(map_html)
with open(map_html, 'r') as file:
html_content = file.read()
return html_content
async def post_location(location: Location):
DEBUG(f"post_location called with {location.datetime}")
conn = await get_db_connection()
try:
context = location.context or {}
action = context.get('action', 'manual')
device_type = context.get('device_type', 'Unknown')
device_model = context.get('device_model', 'Unknown')
device_name = context.get('device_name', 'Unknown')
device_os = context.get('device_os', 'Unknown')
# Parse and localize the datetime
localized_datetime = localize_dt(location.datetime)
await conn.execute('''
INSERT INTO locations (datetime, location, city, state, zip, street, action, device_type, device_model, device_name, device_os)
VALUES ($1, ST_SetSRID(ST_MakePoint($2, $3, $4), 4326), $5, $6, $7, $8, $9, $10, $11, $12, $13)
''', localized_datetime, location.longitude, location.latitude, location.elevation, location.city, location.state, location.zip, location.street, action, device_type, device_model, device_name, device_os)
await conn.close()
INFO(f"Successfully posted location: {location.latitude}, {location.longitude} on {localized_datetime}")
return {
'datetime': localized_datetime,
'latitude': location.latitude,
'longitude': location.longitude,
'city': location.city,
'state': location.state,
'zip': location.zip,
'street': location.street,
'elevation': location.elevation,
'action': action,
'device_type': device_type,
'device_model': device_model,
'device_name': device_name,
'device_os': device_os
}
except Exception as e:
ERR(f"Error posting location {e}")
ERR(traceback.format_exc())
return None
@locate.post("/locate")
async def post_locate_endpoint(locations: Union[Location, List[Location]]):
responses = []
if isinstance(locations, Location):
locations = [locations]
for location in locations:
if not location.datetime:
current_time = datetime.now(timezone.utc)
location.datetime = current_time.isoformat()
if not location.elevation:
location.elevation = location.altitude if location.altitude else get_elevation(location.latitude, location.longitude)
# Ensure context is a dictionary with default values if not provided
if not location.context:
location.context = {
"action": "manual",
"device_type": "Pythonista",
"device_model": "Unknown",
"device_name": "Unknown",
"device_os": "Unknown"
}
DEBUG(f"datetime before localization: {location.datetime}")
# Convert datetime string to timezone-aware datetime object
location.datetime = localize_dt(location.datetime)
DEBUG(f"datetime after localization: {location.datetime}")
location_entry = await post_location(location)
if location_entry:
responses.append({"location_data": location_entry}) # Add weather data if necessary
await asyncio.sleep(0.1) # Use asyncio.sleep for async compatibility
return {"message": "Locations and weather updated", "results": responses}
# GET endpoint to fetch the last location before the specified datetime
# @locate.get("/last_location", response_model=Union[Location, Dict[str, str]])
@locate.get("/locate", response_model=List[Location])
async def get_last_location() -> JSONResponse:
query_datetime = datetime.now(TZ)
DEBUG(f"Query_datetime: {query_datetime}")
location = await fetch_last_location_before(query_datetime)
if location:
DEBUG(f"location: {location}")
location_dict = location.model_dump() # use model_dump instead of dict
location_dict["datetime"] = location.datetime.isoformat()
return JSONResponse(content=location_dict)
else:
return JSONResponse(content={"message": "No location found before the specified datetime"}, status_code=404)
@locate.get("/locate/{datetime_str}", response_model=List[Location])
async def get_locate(datetime_str: str, all: bool = False):
try:
date_time = localize_dt(datetime_str)
except ValueError as e:
ERR(f"Invalid datetime string provided: {datetime_str}")
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
locations = await fetch_locations(date_time)
if not locations:
raise HTTPException(status_code=404, detail="No nearby data found for this date and time")
return locations if all else [locations[0]]
future_elevation = """
def get_elevation_srtm(latitude, longitude, srtm_file):
try:
# Open the SRTM dataset
dataset = gdal.Open(srtm_file)
# Get the geotransform and band information
geotransform = dataset.GetGeoTransform()
band = dataset.GetRasterBand(1)
# Calculate the pixel coordinates from the latitude and longitude
x = int((longitude - geotransform[0]) / geotransform[1])
y = int((latitude - geotransform[3]) / geotransform[5])
# Read the elevation value from the SRTM dataset
elevation = band.ReadAsArray(x, y, 1, 1)[0][0]
# Close the dataset
dataset = None
return elevation
except Exception as e:
ERR(f"Error: {e}")
return None
"""
def get_elevation2(latitude: float, longitude: float) -> float:
url = f"https://nationalmap.gov/epqs/pqs.php?x={longitude}&y={latitude}&units=Meters&output=json"
try:
response = requests.get(url)
data = response.json()
elevation = data["USGS_Elevation_Point_Query_Service"]["Elevation_Query"]["Elevation"]
return float(elevation)
except Exception as e:
# Handle exceptions (e.g., network errors, API changes) appropriately
raise RuntimeError(f"Error getting elevation data: {str(e)}")

1097
sijapi/routers/note.py Normal file

File diff suppressed because it is too large Load diff

16
sijapi/routers/rag.py Normal file
View file

@ -0,0 +1,16 @@
'''
IN DEVELOPMENT: Retrieval-Augmented Generation module.
NOTES: Haven't yet decided if this should depend on the Obsidian and Chat modules, or if they should depend on it, or one of one the other the other.
'''
from fastapi import APIRouter
rag = APIRouter()
rag.get("/rag/search")
async def rag_search_endpoint(query: str, scope: str):
pass
rag.post("/rag/embed")
async def rag_upload_endpoint(path: str):
pass

440
sijapi/routers/sd.py Normal file
View file

@ -0,0 +1,440 @@
'''
Image generation module using StableDiffusion and similar models by way of ComfyUI.
DEPENDS ON:
LLM module
COMFYUI_URL, COMFYUI_DIR, COMFYUI_OUTPUT_DIR, HOST_PORT, TS_SUBNET, TS_ADDRESS, DATA_DIR, CONFIG_DIR, SD_IMAGE_DIR, SD_WORKFLOWS_DIR, LOCAL_HOSTS, BASE_URL, PHOTOPRISM_USER*, PHOTOPRISM_URL*, PHOTOPRISM_PASS*
*unimplemented.
'''
from fastapi import APIRouter, Request, Response
from starlette.datastructures import Address
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from aiohttp import ClientSession, ClientTimeout
import aiofiles
from PIL import Image
from pathlib import Path
import uuid
import json
import ipaddress
import socket
import subprocess
import os, re
import random
from io import BytesIO
import base64
import asyncio
import shutil
# from photoprism.Session import Session
# from photoprism.Photo import Photo
# from webdav3.client import Client
from sijapi.routers.llm import query_ollama
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import COMFYUI_URL, COMFYUI_LAUNCH_CMD, COMFYUI_DIR, COMFYUI_OUTPUT_DIR, HOST_PORT, TS_SUBNET, CONFIG_DIR, SD_IMAGE_DIR, SD_WORKFLOWS_DIR, LOCAL_HOSTS, BASE_URL
sd = APIRouter()
uri_path = "img"
CONFIG_PATH = CONFIG_DIR / 'sd.json'
with open(CONFIG_PATH, 'r') as config_file:
CONFIG = json.load(config_file)
CLIENT_ID = str(uuid.uuid4())
@sd.post("/sd")
@sd.post("/v1/images/generations")
async def sd_endpoint(request: Request):
request_data = await request.json()
prompt = request_data.get("prompt")
model = request_data.get("model")
size = request_data.get("size")
style = request_data.get("style") or "photorealistic"
earlyurl = request_data.get("earlyurl", False)
filename = await workflow(prompt=prompt, scene=model, size=size, style=style, earlyurl=earlyurl)
client_ip = ipaddress.ip_address(request.client.host)
if client_ip in LOCAL_HOSTS:
url = f"http://localhost:{HOST_PORT}/{uri_path}/{filename}"
elif client_ip in TS_SUBNET:
# url = f"{TS_ADDRESS}:{HOST_PORT}/{uri}"
url = f"{BASE_URL}/{uri_path}/{filename}"
else:
url = f"We are watching you {request.client.host}"
return JSONResponse({"image_url": url})
async def workflow(prompt: str, scene: str = None, size: str = None, style: str = "photorealistic", earlyurl: bool = False, destination_path: str = None):
scene_data = get_scene(scene)
if not scene_data:
scene_data = get_matching_scene(prompt)
prompt = scene_data['LLM_PrePrompt'] + prompt # LLM_PrePrompt serves as a preface to the prompt
image_concept = await query_ollama(usr=prompt, sys=scene_data['LLM_SysMsg'], max_tokens=100)
DEBUG(f"sd_endpoint - image_concept: {image_concept}")
scene_workflow = random.choice(scene_data['workflows'])
size = size if size else scene_workflow.get('size', '1024x1024')
try:
width, height = map(int, size.split('x'))
except ValueError:
return JSONResponse({"error": "Invalid size format. Please use 'widthxheight'."})
workflow_path = Path(SD_WORKFLOWS_DIR) / scene_workflow['workflow']
workflow_data = json.loads(workflow_path.read_text())
post = {
"API_PPrompt": scene_data['API_PPrompt'] + image_concept + "; ((" + scene_data['triggers'][0] + ")) ",
"API_SPrompt": scene_data['API_SPrompt'],
"API_NPrompt": scene_data['API_NPrompt'],
"width": width,
"height": height
}
saved_file_key = update_prompt(workflow=workflow_data, post=post)
DEBUG(f"Saved file key: {saved_file_key}")
preset_values = scene_workflow.get('preset_values')
set_presets(workflow_data, preset_values)
DEBUG(f"Workflow_data: {workflow_data}")
prompt_id = await queue_prompt(workflow_data)
if destination_path is None:
destination_path = SD_IMAGE_DIR / f"{prompt_id}"
destination_path = Path(destination_path).with_suffix(".jpg")
max_size = max(width, height)
if earlyurl:
asyncio.create_task(poll_status_and_save(prompt_id, destination_path, max_size, 90))
return get_return_path(destination_path)
else:
local_path = await poll_status_and_save(prompt_id, destination_path, max_size, 90)
await asyncio.sleep(1)
return get_return_path(destination_path)
def set_presets(workflow_data, preset_values):
if preset_values:
preset_node = preset_values.get('node')
preset_key = preset_values.get('key')
values = preset_values.get('values')
if preset_node and preset_key and values:
preset_value = random.choice(values)
if 'inputs' in workflow_data.get(preset_node, {}):
workflow_data[preset_node]['inputs'][preset_key] = preset_value
else:
DEBUG("Node not found in workflow_data")
else:
DEBUG("Required data missing in preset_values")
else:
DEBUG("No preset_values found")
def get_return_path(destination_path):
sd_dir = Path(SD_IMAGE_DIR)
if destination_path.parent.samefile(sd_dir):
return destination_path.name
else:
return str(destination_path)
# This allows selected scenes by name
def get_scene(scene):
for scene_data in CONFIG['scenes']:
if scene_data['scene'] == scene:
return scene_data
return None
# This returns the scene with the most trigger words present in the provided prompt, or otherwise if none match it returns the first scene in the array - meaning the first should be considered the default scene.
def get_matching_scene(prompt):
prompt_lower = prompt.lower()
max_count = 0
scene_data = None
for sc in CONFIG['scenes']:
count = sum(1 for trigger in sc['triggers'] if trigger in prompt_lower)
if count > max_count:
max_count = count
scene_data = sc
return scene_data if scene_data else CONFIG['scenes'][0] # fall back on first scene, which should be an appropriate default scene.
async def poll_status_and_save(prompt_id, destination_path: Path, max_size=1440, quality=90):
try:
status_data = await poll_status(prompt_id)
save_image_key = None
for key, value in status_data.get("outputs", {}).items():
if "images" in value:
save_image_key = key
break
if save_image_key:
image_data = await get_image(status_data, save_image_key)
new_path = await save_as_jpg(image_data, prompt_id, max_size, quality, destination_path)
if destination_path and destination_path != new_path:
WARN(f"Unusual behavior in poll_status_and_save and save_as_jpg. Destination path: {destination_path}, new_path returned by save_as_jpg: {new_path}")
shutil.move(new_path, destination_path)
return new_path
except Exception as e:
raise Exception(f"Error in poll_status_and_save: {e}")
async def ensure_comfy():
try:
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
print("ComfyUI is already running.")
return
except (socket.timeout, ConnectionRefusedError):
print("ComfyUI is not running. Starting it now...")
try:
tmux_command = (
"tmux split-window -h "
"\"source /Users/sij/.zshrc; cd /Users/sij/workshop/ComfyUI; "
"mamba activate comfyui && "
"python main.py; exec $SHELL\""
)
subprocess.Popen(tmux_command, shell=True)
print("ComfyUI started in a new tmux session.")
# Wait for 10 seconds
await asyncio.sleep(15)
print("Waited 15 seconds after starting ComfyUI.")
except Exception as e:
print(f"Error starting ComfyUI: {e}")
async def poll_status(prompt_id):
"""Asynchronously poll the job status until it's complete and return the status data."""
start_time = asyncio.get_event_loop().time() # Use asyncio's loop
await ensure_comfy()
async with ClientSession() as session:
while True:
elapsed_time = int(asyncio.get_event_loop().time() - start_time) # Calculate elapsed time in seconds
response = await session.get(f"{COMFYUI_URL}/history/{prompt_id}")
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds")
if response.status != 200:
raise Exception("Failed to get job status")
status_data = await response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
await asyncio.sleep(10)
async def get_image(status_data, key):
"""Asynchronously extract the filename and subfolder from the status data and read the file."""
try:
outputs = status_data.get("outputs", {})
images_info = outputs.get(key, {}).get("images", [])
if not images_info:
raise Exception("No images found in the job output.")
image_info = images_info[0] # Assuming the first image is the target
filename = image_info.get("filename")
subfolder = image_info.get("subfolder", "") # Default to empty if not present
file_path = os.path.join(COMFYUI_OUTPUT_DIR, subfolder, filename)
async with aiofiles.open(file_path, 'rb') as file:
return await file.read()
except KeyError as e:
raise Exception(f"Failed to extract image information due to missing key: {e}")
except FileNotFoundError:
raise Exception(f"File {filename} not found at the expected path {file_path}")
async def save_as_jpg(image_data, prompt_id, max_size=2160, quality=80, destination_path: Path = None, keep_original: bool = True):
destination_path_png = (SD_IMAGE_DIR / prompt_id).with_suffix(".png")
destination_path_jpg = destination_path.with_suffix(".jpg") if destination_path else (SD_IMAGE_DIR / prompt_id).with_suffix(".jpg")
DEBUG(f"Constructed jpg destination: {destination_path_jpg}")
try:
# Creates the directory if does not exist
destination_path.parent.mkdir(parents=True, exist_ok=True)
await asyncio.sleep(1)
# Save the image as .png
async with aiofiles.open(destination_path_png, 'wb') as file:
await file.write(image_data)
with Image.open(destination_path_png) as img:
# Resize image if needed
if max(img.size) > max_size:
ratio = max_size / max(img.size)
new_size = tuple([int(x * ratio) for x in img.size])
img = img.resize(new_size, Image.Resampling.LANCZOS)
# Convert and save image as .jpg
img.convert('RGB').save(destination_path_jpg, format='JPEG', quality=quality)
if keep_original == False:
os.remove(destination_path_png)
return str(destination_path_jpg)
except Exception as e:
ERR(f"Error processing image: {e}")
return None
# async def upload_and_get_shareable_link(image_path):
# try:
# Set up the PhotoPrism session
# pp_session = Session(PHOTOPRISM_USER, PHOTOPRISM_PASS, PHOTOPRISM_URL, use_https=True)
# pp_session.create()
# Start import
# photo = Photo(pp_session)
# photo.start_import(path=os.path.dirname(image_path))
# Give PhotoPrism some time to process the upload
# await asyncio.sleep(5)
# Search for the uploaded photo
# photo_name = os.path.basename(image_path)
# search_results = photo.search(query=f"name:{photo_name}", count=1)
# if search_results['photos']:
# photo_uuid = search_results['photos'][0]['uuid']
# shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}"
# return shareable_link
# else:
# ERR("Could not find the uploaded photo details.")
# return None
# except Exception as e:
# ERR(f"Error in upload_and_get_shareable_link: {e}")
# return None
@sd.get("/image/{prompt_id}")
async def get_image_status(prompt_id: str):
status_data = await poll_status(prompt_id)
save_image_key = None
for key, value in status_data.get("outputs", {}).items():
if "images" in value:
save_image_key = key
break
if save_image_key:
image_data = await get_image(status_data, save_image_key)
await save_as_jpg(image_data, prompt_id)
external_url = f"https://api.lone.blue/img/{prompt_id}.jpg"
return JSONResponse({"image_url": external_url})
else:
return JSONResponse(content={"status": "Processing", "details": status_data}, status_code=202)
@sd.get("/image-status/{prompt_id}")
async def get_image_processing_status(prompt_id: str):
try:
status_data = await poll_status(prompt_id)
return JSONResponse(content={"status": "Processing", "details": status_data}, status_code=200)
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)
@sd.options("/v1/images/generations", tags=["generations"])
async def get_generation_options():
return {
"model": {
"description": "The model to use for image generation.",
"type": "string",
"example": "stable-diffusion"
},
"prompt": {
"description": "The text prompt for the image generation.",
"type": "string",
"required": True,
"example": "A beautiful sunset over the ocean."
},
"n": {
"description": "The number of images to generate.",
"type": "integer",
"default": 1,
"example": 3
},
"size": {
"description": "The size of the generated images in 'widthxheight' format.",
"type": "string",
"default": "1024x1024",
"example": "512x512"
},
"style": {
"description": "The style for the generated images.",
"type": "string",
"default": "photorealistic",
"example": "cartoon"
},
"raw": {
"description": "Whether to return raw image data or not.",
"type": "boolean",
"default": False
},
"earlyurl": {
"description": "Whether to return the URL early or wait for the image to be ready.",
"type": "boolean",
"default": False
}
}
async def load_workflow(workflow_path: str, workflow:str):
workflow_path = workflow_path if workflow_path else os.path.join(SD_WORKFLOWS_DIR, f"{workflow}.json" if not workflow.endswith('.json') else workflow)
with open(workflow_path, 'r') as file:
return json.load(file)
def update_prompt(workflow: dict, post: dict, found_key=[None], path=None):
if path is None:
path = []
try:
if isinstance(workflow, dict):
for key, value in workflow.items():
current_path = path + [key]
if isinstance(value, dict):
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
found_key[0] = key
update_prompt(value, post, found_key=found_key, path=current_path)
elif isinstance(value, list):
# Recursive call with updated path for each item in a list
for index, item in enumerate(value):
update_prompt(item, post, found_key, current_path + [str(index)])
if value == "API_PPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
elif value == "API_SPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
elif value == "API_NPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999)
DEBUG(f"Updated seed to: {workflow[key]}")
elif key in ["width", "max_width", "scaled_width", "side_length"]:
workflow[key] = post["width"]
elif key in ["height", "max_height", "scaled_height"]:
workflow[key] = post["height"]
except Exception as e:
ERR(f"Error in update_prompt at path {' -> '.join(path)}: {e}")
raise
return found_key[0]
async def queue_prompt(prompt: dict):
DEBUG(f"Dict to queue: {prompt}")
async with ClientSession() as session:
await ensure_comfy()
response = await session.post(f"{COMFYUI_URL}/prompt", json={"prompt": prompt, "client_id": CLIENT_ID})
if response.status == 200:
json_data = await response.json() # Properly await the JSON parsing
return json_data.get('prompt_id') # Then access the 'prompt_id'
else:
error_message = f"Failed to queue prompt. Status code: {response.status}, Response body: {await response.text()}"
ERR(error_message)
raise Exception(error_message)

30
sijapi/routers/serve.py Normal file
View file

@ -0,0 +1,30 @@
'''
Web server module. Used by other modules when serving static content is required, e.g. the sd image generation module. Also used to serve PUBLIC_KEY.
'''
import os
from fastapi import APIRouter, Form, HTTPException, Request, Response
from fastapi.responses import FileResponse
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pathlib import Path
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import bool_convert, sanitize_filename
from sijapi import DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY
serve = APIRouter(tags=["public"])
@serve.get("/pgp")
async def get_pgp():
return Response(PUBLIC_KEY, media_type="text/plain")
@serve.get("/img/{image_name}")
def serve_image(image_name: str):
image_path = os.path.join(SD_IMAGE_DIR, image_name)
if os.path.exists(image_path):
return FileResponse(image_path)
else:
return {"error": "Image not found"}

211
sijapi/routers/summarize.py Normal file
View file

@ -0,0 +1,211 @@
from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse
from pathlib import Path
import tempfile
import filetype
import shutil
import os
import re
from os.path import basename, splitext
from datetime import datetime
from typing import Optional, Union, List
from PyPDF2 import PdfReader
from pdfminer.high_level import extract_text as pdfminer_extract_text
import pytesseract
from pdf2image import convert_from_path
import asyncio
import html2text
import markdown
from ollama import Client, AsyncClient
from docx import Document
from sijapi.routers.tts import generate_speech
from sijapi.routers.asr import transcribe_audio
from sijapi.utilities import sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension, f
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
summarize = APIRouter(tags=["trusted", "private"])
@summarize.get("/summarize")
async def summarize_get(text: str = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
summarized_text = await summarize_text(text, instruction)
return summarized_text
@summarize.post("/summarize")
async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
text_content = text if text else await extract_text(file)
summarized_text = await summarize_text(text_content, instruction)
return summarized_text
@summarize.post("/speaksummary")
async def summarize_tts_endpoint(background_tasks: BackgroundTasks, instruction: str = Form(SUMMARY_INSTRUCT), file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), voice: Optional[str] = Form(DEFAULT_VOICE), speed: Optional[float] = Form(1.2), podcast: Union[bool, str] = Form(False)):
podcast = str_to_bool(str(podcast)) # Proper boolean conversion
text_content = text if text else extract_text(file)
final_output_path = await summarize_tts(text_content, instruction, voice, speed, podcast)
return FileResponse(path=final_output_path, filename=os.path.basename(final_output_path), media_type='audio/wav')
async def summarize_tts(
text: str,
instruction: str = SUMMARY_INSTRUCT,
voice: Optional[str] = DEFAULT_VOICE,
speed: float = 1.1,
podcast: bool = False,
LLM: AsyncClient = None
):
LLM = LLM if LLM else AsyncClient()
summarized_text = await summarize_text(text, instruction, LLM=LLM)
filename = await summarize_text(summarized_text, "Provide a title for this summary no longer than 4 words")
filename = sanitize_filename(filename)
filename = ' '.join(filename.split()[:5])
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{timestamp}{filename}.wav"
background_tasks = BackgroundTasks()
final_output_path = await generate_speech(background_tasks, summarized_text, voice, "xtts", speed=speed, podcast=podcast, title=filename)
DEBUG(f"summary_tts completed with final_output_path: {final_output_path}")
return final_output_path
async def get_title(text: str, LLM: AsyncClient() = None):
LLM = LLM if LLM else AsyncClient()
title = await process_chunk("Generate a title for this text", text, 1, 1, 12, LLM)
title = sanitize_filename(title)
return title
def split_text_into_chunks(text: str) -> List[str]:
"""
Splits the given text into manageable chunks based on predefined size and overlap.
"""
words = text.split()
adjusted_chunk_size = max(1, int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)) # Ensure at least 1
adjusted_overlap = max(0, int(SUMMARY_CHUNK_OVERLAP / SUMMARY_TPW)) # Ensure non-negative
chunks = []
for i in range(0, len(words), adjusted_chunk_size - adjusted_overlap):
DEBUG(f"We are on iteration # {i} if split_text_into_chunks.")
chunk = ' '.join(words[i:i + adjusted_chunk_size])
chunks.append(chunk)
return chunks
def calculate_max_tokens(text: str) -> int:
tokens_count = max(1, int(len(text.split()) * SUMMARY_TPW)) # Ensure at least 1
return min(tokens_count // 4, SUMMARY_CHUNK_SIZE)
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], background_tasks: BackgroundTasks = None) -> str:
if isinstance(file, UploadFile):
file_extension = get_extension(file)
temp_file_path = tempfile.mktemp(suffix=file_extension)
with open(temp_file_path, 'wb') as buffer:
shutil.copyfileobj(file.file, buffer)
file_path = temp_file_path
elif isinstance(file, (bytes, bytearray)):
temp_file_path = tempfile.mktemp()
with open(temp_file_path, 'wb') as buffer:
buffer.write(file)
file_path = temp_file_path
elif isinstance(file, (str, Path)):
file_path = str(file)
else:
raise ValueError("Unsupported file type")
_, file_ext = os.path.splitext(file_path)
file_ext = file_ext.lower()
text_content = ""
if file_ext == '.pdf':
text_content = await extract_text_from_pdf(file_path)
elif file_ext in ['.wav', '.m4a', '.m4v', '.mp3', '.mp4']:
text_content = await transcribe_audio(file_path=file_path)
elif file_ext == '.md':
text_content = await read_text_file(file_path)
text_content = markdown.markdown(text_content)
elif file_ext == '.html':
text_content = await read_text_file(file_path)
text_content = html2text.html2text(text_content)
elif file_ext in ['.txt', '.csv', '.json']:
text_content = await read_text_file(file_path)
elif file_ext == '.docx':
text_content = await extract_text_from_docx(file_path)
if background_tasks and 'temp_file_path' in locals():
background_tasks.add_task(os.remove, temp_file_path)
elif 'temp_file_path' in locals():
os.remove(temp_file_path)
return text_content
async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_override: int = None, length_quotient: float = SUMMARY_LENGTH_RATIO, LLM: AsyncClient = None):
"""
Process the given text: split into chunks, summarize each chunk, and
potentially summarize the concatenated summary for long texts.
"""
LLM = LLM if LLM else AsyncClient()
chunked_text = split_text_into_chunks(text)
total_parts = max(1, len(chunked_text)) # Ensure at least 1
total_words_count = len(text.split())
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW)) # Ensure at least 1
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
individual_summary_length = max(1, corrected_total_summary_length // total_parts) # Ensure at least 1
DEBUG(f"Text split into {total_parts} chunks.")
summaries = await asyncio.gather(*[
process_chunk(instruction, chunk, i+1, total_parts, individual_summary_length, LLM) for i, chunk in enumerate(chunked_text)
])
concatenated_summary = ' '.join(summaries)
if total_parts > 1:
concatenated_summary = await process_chunk(instruction, concatenated_summary, 1, 1)
return concatenated_summary
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, max_tokens: Optional[int] = None, LLM: AsyncClient = None) -> str:
"""
Process a portion of text using the ollama library asynchronously.
"""
LLM = LLM if LLM else AsyncClient()
words_count = max(1, len(text.split())) # Ensure at least 1
tokens_count = max(1, int(words_count * SUMMARY_TPW)) # Ensure at least 1
fraction_tokens = max(1, tokens_count // SUMMARY_LENGTH_RATIO) # Ensure at least 1
if max_tokens is None:
max_tokens = min(fraction_tokens, SUMMARY_CHUNK_SIZE // max(1, total_parts)) # Ensure at least 1
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) # Ensure a minimum token count to avoid tiny processing chunks
DEBUG(f"Summarizing part {part} of {total_parts}: Max_tokens: {max_tokens}")
if part and total_parts > 1:
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
else:
prompt = f"{instruction}:\n\n{text}"
DEBUG(f"Starting LLM.generate for part {part} of {total_parts}")
response = await LLM.generate(
model=SUMMARY_MODEL,
prompt=prompt,
stream=False,
options={'num_predict': max_tokens, 'temperature': 0.6}
)
text_response = response['response']
DEBUG(f"Completed LLM.generate for part {part} of {total_parts}")
return text_response
async def title_and_summary(extracted_text: str):
title = await get_title(extracted_text)
processed_title = title.split("\n")[-1]
processed_title = processed_title.split("\r")[-1]
processed_title = sanitize_filename(processed_title)
summary = await summarize_text(extracted_text)
return processed_title, summary

577
sijapi/routers/time.py Normal file
View file

@ -0,0 +1,577 @@
import tempfile
import os
import json
import requests
import csv
import subprocess
import asyncio
import httpx
import io
import re
import pytz
import httpx
import sqlite3
import math
from httpx import Timeout
from fastapi import APIRouter, UploadFile, File, Response, Header, Query, Depends, FastAPI, Request, HTTPException, status
from fastapi.responses import FileResponse, JSONResponse
from pydantic import BaseModel, Field
from datetime import datetime, timedelta
from sijapi.utilities import localize_dt
from decimal import Decimal, ROUND_UP
from typing import Optional, List, Dict, Union, Tuple
from collections import defaultdict
from dotenv import load_dotenv
from traceback import format_exc
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, TIMING_API_KEY, TIMING_API_URL
### INITIALIZATIONS ###
time = APIRouter(tags=["private"])
########################
#### INITIALIZATION ####
########################
script_directory = os.path.dirname(os.path.abspath(__file__))
# Configuration constants
pacific = pytz.timezone('America/Los_Angeles')
emoji_pattern = re.compile(r'^[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F700-\U0001F77F\U0001F780-\U0001F7FF\U0001F800-\U0001F8FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FA6F\U0001FA70-\U0001FAFF\U00002702-\U000027B0\U000024C2-\U0001F251]+ ')
timeout = Timeout(connect=30, read=600, write=120, pool=5)
# Define your models
class TimingRequest(BaseModel):
start_date: str = Field(..., pattern=r"\d{4}-\d{2}-\d{2}")
end_date: Optional[str] = Field(None, pattern=r"\d{4}-\d{2}-\d{2}")
output_format: Optional[str] = 'json'
####################
#### TIMING API ####
####################
@time.post("/time/post")
async def post_time_entry_to_timing(entry: Dict):
url = 'https://web.timingapp.com/api/v1/time-entries'
headers = {
'Authorization': f'Bearer {TIMING_API_KEY}',
'Content-Type': 'application/json',
'Accept': 'application/json',
'X-Time-Zone': 'America/Los_Angeles'
}
DEBUG(f"Received entry: {entry}")
response = None # Initialize response
try:
async with httpx.AsyncClient() as client:
response = await client.post(url, headers=headers, json=entry)
response.raise_for_status() # This will only raise for 4xx and 5xx responses
except httpx.HTTPStatusError as exc:
DEBUG(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
except Exception as exc:
DEBUG(f"General exception caught: {exc}")
raise HTTPException(status_code=500, detail="An unexpected error occurred")
if response:
return response.json()
else:
# Handle the case where the response was not set due to an error.
raise HTTPException(status_code=500, detail="Failed to make the external API request")
def project_sort_key(project):
# Remove any leading emoji characters for sorting
return emoji_pattern.sub('', project)
def prepare_date_range_for_query(start_date, end_date=None):
# Adjust the start date to include the day before
start_date_adjusted = (datetime.strptime(start_date, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")
# If end_date is not provided, use the original start_date as the end_date
end_date = end_date if end_date else start_date
# Format the end_date
end_date_formatted = f"{end_date}T23:59:59"
return f"{start_date_adjusted}T00:00:00", end_date_formatted
def truncate_project_title(title):
return title.split(' - ')[0] if ' - ' in title else title
async def fetch_and_prepare_timing_data(start: datetime, end: Optional[datetime] = None) -> List[Dict]:
# start_date = localize_dt(start)
# end_date = localize_dt(end) if end else None
# Adjust the start date to include the day before and format the end date
start_date_adjusted = (start - timedelta(days=1)).strftime("%Y-%m-%dT00:00:00")
end_date_formatted = f"{datetime.strftime(end, '%Y-%m-%d')}T23:59:59" if end else f"{datetime.strftime(start, '%Y-%m-%d')}T23:59:59"
# Fetch timing data from the API using TIMING_API_KEY
url = f"{TIMING_API_URL}/time-entries?start_date_min={start_date_adjusted}&start_date_max={end_date_formatted}&include_project_data=1"
headers = {
'Authorization': f'Bearer {TIMING_API_KEY}',
'Content-Type': 'application/json',
'Accept': 'application/json',
'X-Time-Zone': 'America/Los_Angeles'
}
processed_timing_data = []
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers)
if response.status_code != 200:
response.raise_for_status()
raw_timing_data = response.json().get('data', [])
for entry in raw_timing_data:
entry_start_utc = datetime.strptime(entry['start_date'], '%Y-%m-%dT%H:%M:%S.%f%z')
entry_end_utc = datetime.strptime(entry['end_date'], '%Y-%m-%dT%H:%M:%S.%f%z')
entry_start_pacific = entry_start_utc.astimezone(pacific)
entry_end_pacific = entry_end_utc.astimezone(pacific)
while entry_start_pacific.date() < entry_end_pacific.date():
midnight = pacific.localize(datetime.combine(entry_start_pacific.date() + timedelta(days=1), datetime.min.time()))
duration_to_midnight = (midnight - entry_start_pacific).total_seconds()
if entry_start_pacific.date() >= start.date():
processed_entry = create_time_entry(entry, entry_start_pacific, midnight, duration_to_midnight)
processed_timing_data.append(processed_entry)
entry_start_pacific = midnight
if entry_start_pacific.date() >= start.date():
duration_remaining = (entry_end_pacific - entry_start_pacific).total_seconds()
processed_entry = create_time_entry(entry, entry_start_pacific, entry_end_pacific, duration_remaining)
processed_timing_data.append(processed_entry)
return processed_timing_data
def format_duration(duration):
duration_in_hours = Decimal(duration) / Decimal(3600)
rounded_duration = duration_in_hours.quantize(Decimal('0.1'), rounding=ROUND_UP)
return str(rounded_duration)
def create_time_entry(original_entry, start_time, end_time, duration_seconds):
"""Formats a time entry, preserving key details and adding necessary elements."""
# Format start and end times in the appropriate timezone
start_time_aware = start_time.astimezone(pacific)
end_time_aware = end_time.astimezone(pacific)
# Check if project is None and handle accordingly
if original_entry.get('project'):
project_title = original_entry['project'].get('title', 'No Project')
project_color = original_entry['project'].get('color', '#FFFFFF') # Default color
else:
project_title = 'No Project'
project_color = '#FFFFFF' # Default color
# Construct the processed entry
processed_entry = {
'start_time': start_time_aware.strftime('%Y-%m-%dT%H:%M:%S.%f%z'),
'end_time': end_time_aware.strftime('%Y-%m-%dT%H:%M:%S.%f%z'),
'start_date': start_time_aware.strftime('%Y-%m-%d'),
'end_date': end_time_aware.strftime('%Y-%m-%d'),
'duration': format_duration(duration_seconds),
'notes': original_entry.get('notes', ''),
'title': original_entry.get('title', 'Untitled'),
'is_running': original_entry.get('is_running', False),
'project': {
'title': project_title,
'color': project_color,
# Include other project fields as needed
},
# Additional original fields as required
}
return processed_entry
# TIMELINE
@time.get("/time/line")
async def get_timing_timeline(
request: Request,
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
# Retain these for processing timeline data with the correct timezone
queried_start_date = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
queried_end_date = (datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
if end_date else queried_start_date)
# Fetch and process timing data
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
# Process timeline data
timeline_formatted_data = process_timeline(timing_data, queried_start_date, queried_end_date)
return Response(content=timeline_formatted_data, media_type="text/markdown")
def process_timeline(timing_data, queried_start_date, queried_end_date):
timeline_output = []
entries_by_date = defaultdict(list)
for entry in timing_data:
# Convert start and end times to datetime objects and localize to Pacific timezone
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
project_title = truncate_project_title(entry['project']['title']) if entry.get('project') else 'No Project'
task_title = entry['title'] if entry.get('title') else 'Untitled'
# Check if the entry's date falls within the queried date range
if queried_start_date <= start_datetime.date() <= queried_end_date:
duration_seconds = (end_datetime - start_datetime).total_seconds()
duration_hours = format_duration(duration_seconds)
entries_by_date[start_datetime.date()].append(
(start_datetime.strftime('%H:%M:%S'), project_title, task_title, duration_hours)
)
# Sorting and outputting the timeline
for date, entries in sorted(entries_by_date.items()):
sorted_entries = sorted(entries, key=lambda x: x[0])
day_total_duration = sum(Decimal(entry[3]) for entry in sorted_entries)
if queried_start_date != queried_end_date:
timeline_output.append(f"## {date.strftime('%Y-%m-%d')} {date.strftime('%A')} [{day_total_duration}]\n")
for start_time, project, task, duration in sorted_entries:
timeline_output.append(f" - {start_time} {project} - {task} [{duration}]")
return "\n".join(timeline_output)
# CSV
@time.get("/time/csv")
async def get_timing_csv(
request: Request,
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
# Fetch and process timing data
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
# Retain these for processing CSV data with the correct timezone
queried_start_date = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
queried_end_date = (datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
if end_date else queried_start_date)
# Process CSV data
csv_data = process_csv(timing_data, queried_start_date, queried_end_date)
if not csv_data or csv_data.strip() == "":
return Response(content="No CSV data available for the specified date range.", media_type="text/plain")
return Response(content=csv_data, media_type="text/csv")
def process_csv(timing_data, queried_start_date, queried_end_date):
project_task_data = defaultdict(lambda: defaultdict(list))
for entry in timing_data:
# Convert start and end times to datetime objects and localize to Pacific timezone
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
# Ensure the entry's date falls within the queried date range
if queried_start_date <= start_datetime.date() <= queried_end_date:
duration_seconds = (end_datetime - start_datetime).total_seconds()
duration_hours = format_duration(duration_seconds) # Convert duration to hours
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
project_task_data[start_datetime.date()][project_title].append(
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
)
output = io.StringIO()
writer = csv.writer(output, delimiter='|', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Date', 'Project', 'Task', 'Notes', 'Duration'])
for date, project_tasks in sorted(project_task_data.items()):
day_total_duration = Decimal(0)
formatted_date = date.strftime('%Y-%m-%d %a')
for project, tasks in sorted(project_tasks.items(), key=lambda item: project_sort_key(item[0])):
task_summary = defaultdict(Decimal)
for task, duration in tasks:
task_summary[task] += Decimal(duration)
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
day_total_duration += project_duration
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{str(task_summary[task].quantize(Decimal('0.1')))}]" for task in task_summary])
writer.writerow([formatted_date, project, tasks_formatted, '', str(project_duration.quantize(Decimal('0.1')))])
writer.writerow([formatted_date, 'Day Total', '', '', str(day_total_duration.quantize(Decimal('0.1')))])
writer.writerow(['', '', '', '', ''])
return output.getvalue()
# MARKDOWN
@time.get("/time/markdown3")
async def get_timing_markdown3(
request: Request,
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
# Fetch and process timing data
start = localize_dt(start_date)
end = localize_dt(end_date) if end_date else None
timing_data = await fetch_and_prepare_timing_data(start, end)
# Retain these for processing Markdown data with the correct timezone
queried_start_date = start.replace(tzinfo=pacific).date()
queried_end_date = end.replace(tzinfo=pacific).date() if end else queried_start_date
# Process Markdown data
markdown_formatted_data = process_timing_markdown3(timing_data, queried_start_date, queried_end_date)
return Response(content=markdown_formatted_data, media_type="text/markdown")
def process_timing_markdown3(timing_data, queried_start_date, queried_end_date):
markdown_output = []
project_task_data = defaultdict(lambda: defaultdict(list))
for entry in timing_data:
# Convert start and end times to datetime objects and localize to Pacific timezone
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
# Check if the entry's date falls within the queried date range
if queried_start_date <= start_datetime.date() <= queried_end_date:
duration_seconds = (end_datetime - start_datetime).total_seconds()
duration_hours = format_duration(duration_seconds)
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
project_task_data[start_datetime.date()][project_title].append(
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
)
for date, projects in sorted(project_task_data.items()):
day_total_duration = Decimal(0)
tasks_output = []
for project, tasks in sorted(projects.items(), key=lambda item: project_sort_key(item[0])):
task_summary = defaultdict(Decimal)
for task, duration in tasks:
task_summary[task] += Decimal(duration)
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
day_total_duration += project_duration
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{duration}]" for task, duration in task_summary.items()])
tasks_output.append(f"- {project} - {tasks_formatted} - *{project_duration}*.")
if queried_start_date != queried_end_date:
markdown_output.append(f"## {date.strftime('%Y-%m-%d %A')} [{day_total_duration}]\n")
markdown_output.extend(tasks_output)
markdown_output.append("")
return "\n".join(markdown_output)
@time.get("/time/markdown")
async def get_timing_markdown(
request: Request,
start: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
start_date = localize_dt(start)
end_date = localize_dt(end)
markdown_formatted_data = await process_timing_markdown(start_date, end_date)
return Response(content=markdown_formatted_data, media_type="text/markdown")
#return JSONResponse(content={"markdown": markdown_formatted_data}, media_type="text/markdown")
async def process_timing_markdown(start_date: datetime, end_date: datetime): # timing_data, queried_start_date, queried_end_date)
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
queried_start_date = start_date.replace(tzinfo=pacific).date()
queried_end_date = (end_date.replace(tzinfo=pacific).date() if end_date else queried_start_date)
markdown_output = []
project_task_data = defaultdict(lambda: defaultdict(list))
# pacific = pytz.timezone('US/Pacific')
for entry in timing_data:
# Convert start and end times to datetime objects and localize to Pacific timezone
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
# Check if the entry's date falls within the queried date range
if queried_start_date <= start_datetime.date() <= queried_end_date:
duration_seconds = (end_datetime - start_datetime).total_seconds()
duration_hours = format_duration(duration_seconds)
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
project_task_data[start_datetime.date()][project_title].append(
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
)
for date, projects in sorted(project_task_data.items()):
day_total_duration = Decimal(0)
tasks_output = []
for project, tasks in sorted(projects.items(), key=lambda item: project_sort_key(item[0])):
task_summary = defaultdict(Decimal)
for task, duration in tasks:
task_summary[task] += Decimal(duration)
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
day_total_duration += project_duration
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{duration}]" for task, duration in task_summary.items()])
tasks_output.append(f"|{project}|{tasks_formatted}|{project_duration}|")
if queried_start_date != queried_end_date:
markdown_output.append(f"## {date.strftime('%Y-%m-%d %A')} [{day_total_duration}]\n")
tableheader = """|Project|Task(s)|Duration|
|-------|-------|-------:|"""
markdown_output.append(tableheader)
markdown_output.extend(tasks_output)
markdown_output.append(f"|TOTAL| |{day_total_duration}|\n")
markdown_output.append("")
return "\n".join(markdown_output)
#JSON
@time.get("/time/json")
async def get_timing_json(
request: Request,
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
# Fetch and process timing data
start = localize_dt(start_date)
end = localize_dt(end_date)
timing_data = await fetch_and_prepare_timing_data(start, end)
# Convert processed data to the required JSON structure
json_data = process_json(timing_data)
return JSONResponse(content=json_data)
def process_json(timing_data):
structured_data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for entry in timing_data:
date_key = entry['start_date'] # Already in 'YYYY-MM-DD' format
project_title = entry['project']['title'] if 'title' in entry['project'] else 'No Project'
task_title = entry['title']
structured_data[date_key][project_title][task_title].append(entry)
return dict(structured_data)
# ROCKETMATTER CSV PARSING
def load_project_names(filename):
with open(filename, 'r', encoding='utf-8') as file:
return json.load(file)
def parse_input(fields, project_name_mappings, start_times_by_date):
project_code = fields[3].strip()
project_name = project_name_mappings.get(project_code, project_code)
task_descriptions = fields[4].strip()
billing_date_str = fields[6].strip()
total_hours = float(fields[9].strip())
billing_date = datetime.strptime(billing_date_str, "%m/%d/%Y").date()
# If no start time is recorded for this billing_date, default to 8 AM
if billing_date not in start_times_by_date:
start_time = pacific.localize(datetime.combine(billing_date, datetime.min.time()).replace(hour=8))
else:
start_time = start_times_by_date[billing_date]
# Normalize the task descriptions by converting line breaks and variations of task separators (],), (),)\s to standard form [,]
task_descriptions = re.sub(r'(\)|\])(\s+|$)(?=\[|\(|[A-Za-z])', '],', task_descriptions)
task_descriptions = re.sub(r'(\r?\n|\r)', ',', task_descriptions)
# Regex pattern to match task descriptions along with their respective durations.
task_pattern = re.compile(r'(.*?)[\[\(](\d+\.\d+)[\]\)]\s*,?')
tasks_with_durations = task_pattern.findall(task_descriptions)
tasks = []
total_calc_hours = 0
# Process tasks with explicit durations
for task in tasks_with_durations:
task_name, duration_hours = task[0].strip(' ,;'), float(task[1])
task_name = task_name if task_name else "Undefined Task"
tasks.append((task_name, duration_hours))
total_calc_hours += duration_hours
# If there are hours not accounted for, consider them for a task without a specific duration
remainder = total_hours - total_calc_hours
if remainder > 0:
# Include non-specific task or "Undefined Task"
non_duration_task = re.sub(task_pattern, '', task_descriptions).strip(' ,;')
if not non_duration_task:
non_duration_task = "Undefined Task"
tasks.append((non_duration_task, remainder))
# If no specific task durations are found in the description, treat the entire description as one task
if not tasks_with_durations:
task_name = task_descriptions if task_descriptions else "Undefined Task"
tasks.append((task_name, total_hours))
json_entries = []
for task_name, duration_hours in tasks:
duration = timedelta(hours=duration_hours)
end_time = start_time + duration
entry = {
"project": project_name,
"Task": task_name,
"Start_time": start_time.strftime("%Y-%m-%d %H:%M:%S-07:00"),
"End_time": end_time.strftime("%Y-%m-%d %H:%M:%S-07:00")
}
json_entries.append(entry)
start_time = end_time
# Update the start time for the billing_date in the dictionary
start_times_by_date[billing_date] = start_time
return json_entries
async def post_time_entry_to_timing(entry):
url = f"{TIMING_API_URL}/time-entries" # The URL for posting time entries
headers = {
"Authorization": f"Bearer {TIMING_API_KEY}",
"Content-Type": "application/json",
"Accept": "application/json",
'X-Time-Zone': 'America/Los_Angeles' # Set the timezone for the API request
}
data = {
"start_date": entry["Start_time"], # Format these according to the API's requirements
"end_date": entry["End_time"],
"project": entry["project"],
"title": entry["Task"],
"notes": "Automatically generated based on Rocketmatter reports.",
"replace_existing": False
}
response = await httpx.post(url, headers=headers, json=data)
return response.status_code, response.json()
@time.get("/time/flagemoji/{country_code}")
def flag_emoji(country_code: str):
offset = 127397
flag = ''.join(chr(ord(char) + offset) for char in country_code.upper())
return {"emoji": flag}
@time.head("/time/")
async def read_root():
return {}
@time.get("/time/")
async def root():
return {"message": "Ring, ring, ring, ring, ring, ring, ring. \n\n. Banana phone."}

406
sijapi/routers/tts.py Normal file
View file

@ -0,0 +1,406 @@
from fastapi import APIRouter, UploadFile, HTTPException, Response, Form, File, BackgroundTasks, Depends, Request
from fastapi.responses import Response, StreamingResponse, FileResponse
from fastapi.responses import StreamingResponse, PlainTextResponse
import requests
import json
import shutil
from io import BytesIO
import asyncio
from pydantic import BaseModel
from typing import Optional, Union, List
from pydub import AudioSegment
from TTS.api import TTS
from pathlib import Path
from datetime import datetime
from time import time
import torch
import traceback
import hashlib
import uuid
import httpx
import tempfile
import random
import re
import os
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, DATA_DIR, DEFAULT_VOICE, TTS_DIR, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY
from sijapi.utilities import sanitize_filename
### INITIALIZATIONS ###
tts = APIRouter(tags=["trusted", "private"])
DEVICE = torch.device('cpu')
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
@tts.get("/tts/local_voices", response_model=List[str])
async def list_wav_files():
wav_files = [file.split('.')[0] for file in os.listdir(VOICE_DIR) if file.endswith(".wav")]
return wav_files
@tts.get("/tts/elevenlabs_voices")
async def list_11l_voices():
formatted_list = ""
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers)
DEBUG(f"Response: {response}")
if response.status_code == 200:
voices_data = response.json().get("voices", [])
formatted_list = ""
for voice in voices_data:
name = voice["name"]
id = voice["voice_id"]
formatted_list += f"{name}: `{id}`\n"
except Exception as e:
ERR(f"Error determining voice ID: {str(e)}")
return PlainTextResponse(formatted_list, status_code=200)
def select_voice(voice_name: str) -> str:
try:
voice_file = VOICE_DIR / f"{voice_name}.wav"
DEBUG(f"select_voice received query to use voice: {voice_name}. Looking for {voice_file} inside {VOICE_DIR}.")
if voice_file.is_file():
return str(voice_file)
else:
raise HTTPException(status_code=404, detail="Voice file not found")
except Exception as e:
ERR(f"Voice file not found: {str(e)}")
ERR(traceback.format_exc())
raise HTTPException(status_code=404, detail="Voice file not found")
@tts.post("/tts/speak")
@tts.post("/v1/audio/speech")
async def generate_speech_endpoint(
request: Request,
background_tasks: BackgroundTasks,
model: str = Form("eleven_turbo_v2"),
text: Optional[str] = Form(None),
file: Optional[UploadFile] = File(None),
voice: Optional[str] = Form(None),
voice_file: Optional[UploadFile] = File(None),
speed: Optional[float] = Form(1.1),
podcast: Union[bool, str] = Form(False),
stream: bool = Form(True)
):
try:
podcast = podcast if isinstance(podcast, bool) else podcast.lower() == 'true'
text_content = await get_text_content(text, file)
if stream:
model = model if model else await get_model(voice, voice_file)
if model == "eleven_turbo_v2":
voice_id = await determine_voice_id(voice)
audio_stream = await get_audio_stream(model, text_content, voice_id)
return StreamingResponse(audio_stream, media_type="audio/mpeg")
else:
return await stream_tts(text_content, speed, voice, voice_file)
else:
return await generate_speech(background_tasks, text_content, voice, voice_file, model, speed, podcast)
except Exception as e:
ERR(f"Error in TTS: {str(e)}")
ERR(traceback.format_exc())
raise HTTPException(status_code=666, detail="error in TTS")
async def generate_speech(
background_tasks: BackgroundTasks,
text: str,
voice: str = None,
voice_file: UploadFile = None,
model: str = None,
speed: float = 1.1,
podcast: bool = False,
title: str = None,
output_dir = None
) -> str:
output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
if not output_dir.exists():
output_dir.mkdir(parents=True)
try:
model = model if model else await get_model(voice, voice_file)
if model == "eleven_turbo_v2":
INFO(f"Using ElevenLabs.")
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
return str(audio_file_path)
elif model == "xtts":
INFO(f"Using XTTS2")
final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, background_tasks, title, output_dir)
background_tasks.add_task(os.remove, str(final_output_dir))
return str(final_output_dir)
else:
raise HTTPException(status_code=400, detail="Invalid model specified")
except HTTPException as e:
ERR(f"HTTP error: {e}")
ERR(traceback.format_exc())
raise e
except Exception as e:
ERR(f"Error: {e}")
ERR(traceback.format_exc())
raise e
async def get_model(voice: str = None, voice_file: UploadFile = None):
if voice_file or (voice and select_voice(voice)):
return "xtts"
elif voice and await determine_voice_id(voice):
return "eleven_turbo_v2"
else:
raise HTTPException(status_code=400, detail="No model or voice specified")
async def determine_voice_id(voice_name: str) -> str:
hardcoded_voices = {
"alloy": "E3A1KVbKoWSIKSZwSUsW",
"echo": "b42GBisbu9r5m5n6pHF7",
"fable": "KAX2Y6tTs0oDWq7zZXW7",
"onyx": "clQb8NxY08xZ6mX6wCPE",
"nova": "6TayTBKLMOsghG7jYuMX",
"shimmer": "E7soeOyjpmuZFurvoxZ2",
DEFAULT_VOICE: "6TayTBKLMOsghG7jYuMX",
"Sangye": "E7soeOyjpmuZFurvoxZ2",
"Herzog": "KAX2Y6tTs0oDWq7zZXW7",
"Attenborough": "b42GBisbu9r5m5n6pHF7"
}
if voice_name in hardcoded_voices:
voice_id = hardcoded_voices[voice_name]
DEBUG(f"Found voice ID - {voice_id}")
return voice_id
DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.")
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers)
DEBUG(f"Response: {response}")
if response.status_code == 200:
voices_data = response.json().get("voices", [])
for voice in voices_data:
if voice_name == voice["voice_id"] or voice_name == voice["name"]:
return voice["voice_id"]
except Exception as e:
ERR(f"Error determining voice ID: {str(e)}")
return "6TayTBKLMOsghG7jYuMX"
async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = None, output_dir: str = None):
voice_id = await determine_voice_id(voice)
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
payload = {
"text": input_text,
"model_id": model
}
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
response = await client.post(url, json=payload, headers=headers)
output_dir = output_dir if output_dir else TTS_OUTPUT_DIR
title = title if title else datetime.now().strftime("%Y%m%d%H%M%S")
filename = f"{sanitize_filename(title)}.mp3"
file_path = Path(output_dir) / filename
if response.status_code == 200:
with open(file_path, "wb") as audio_file:
audio_file.write(response.content)
return file_path
else:
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str:
if file:
return (await file.read()).decode("utf-8").strip()
elif text:
return text.strip()
else:
raise HTTPException(status_code=400, detail="No text provided")
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
if voice:
return select_voice(voice)
elif voice_file and isinstance(voice_file, UploadFile):
VOICE_DIR.mkdir(exist_ok=True)
content = await voice_file.read()
checksum = hashlib.md5(content).hexdigest()
existing_file = VOICE_DIR / voice_file.filename
if existing_file.is_file():
with open(existing_file, 'rb') as f:
existing_checksum = hashlib.md5(f.read()).hexdigest()
if checksum == existing_checksum:
return str(existing_file)
base_name = existing_file.stem
counter = 1
new_file = existing_file
while new_file.is_file():
new_file = VOICE_DIR / f"{base_name}{counter:02}.wav"
counter += 1
with open(new_file, 'wb') as f:
f.write(content)
return str(new_file)
else:
DEBUG(f"{datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
return select_voice(DEFAULT_VOICE)
async def local_tts(text_content: str, speed: float, voice: str, voice_file = None, podcast: bool = False, background_tasks: BackgroundTasks = None, title: str = None, output_path: Optional[Path] = None) -> str:
if output_path:
file_path = Path(output_path)
else:
datetime_str = datetime.now().strftime("%Y%m%d%H%M%S")
title = sanitize_filename(title) if title else "Audio"
filename = f"{datetime_str}_{title}.wav"
file_path = TTS_OUTPUT_DIR / filename
# Ensure the parent directory exists
file_path.parent.mkdir(parents=True, exist_ok=True)
voice_file_path = await get_voice_file_path(voice, voice_file)
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
segments = split_text(text_content)
combined_audio = AudioSegment.silent(duration=0)
for i, segment in enumerate(segments):
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
DEBUG(f"Segment file path: {segment_file_path}")
segment_file = await asyncio.to_thread(XTTS.tts_to_file, text=segment, speed=speed, file_path=str(segment_file_path), speaker_wav=[voice_file_path], language="en")
DEBUG(f"Segment file generated: {segment_file}")
combined_audio += AudioSegment.from_wav(str(segment_file))
# Delete the segment file immediately after adding it to the combined audio
segment_file_path.unlink()
if podcast:
podcast_file_path = PODCAST_DIR / file_path.name
combined_audio.export(podcast_file_path, format="wav")
combined_audio.export(file_path, format="wav")
return str(file_path)
async def stream_tts(text_content: str, speed: float, voice: str, voice_file) -> StreamingResponse:
voice_file_path = await get_voice_file_path(voice, voice_file)
segments = split_text(text_content)
async def audio_stream_generator():
for segment in segments:
segment_file = await generate_tts(segment, speed, voice_file_path)
with open(segment_file, 'rb') as f:
while chunk := f.read(1024):
yield chunk
os.remove(segment_file)
return StreamingResponse(audio_stream_generator(), media_type='audio/wav')
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en")
return output_dir
async def get_audio_stream(model: str, input_text: str, voice: str):
voice_id = await determine_voice_id(voice)
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
payload = {
"text": input_text,
"model_id": "eleven_turbo_v2"
}
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200:
return response.iter_content(1024)
else:
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
def split_text(text, target_length=35, max_length=50):
text = clean_text_for_tts(text)
sentences = re.split(r'(?<=[.!?"])\s+', text)
segments = []
current_segment = []
for sentence in sentences:
sentence_words = sentence.split()
segment_length = len(' '.join(current_segment).split())
if segment_length + len(sentence_words) > max_length:
segments.append(' '.join(current_segment))
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
current_segment = [sentence]
else:
current_segment.extend(sentence_words)
if current_segment:
segments.append(' '.join(current_segment))
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
return segments
def clean_text_for_tts(text: str) -> str:
if text is not None:
text = text.replace("\n", " ").replace("\r", " ")
text = re.sub(r"[^\w\s.,;:!?'\"]", '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
else:
DEBUG(f"No text received.")
def copy_to_podcast_dir(file_path):
try:
# Extract the file name from the file path
file_name = Path(file_path).name
# Construct the destination path in the PODCAST_DIR
destination_path = PODCAST_DIR / file_name
# Copy the file to the PODCAST_DIR
shutil.copy(file_path, destination_path)
print(f"File copied successfully to {destination_path}")
except FileNotFoundError:
print(f"File not found: {file_path}")
except shutil.SameFileError:
print(f"Source and destination are the same file: {file_path}")
except PermissionError:
print(f"Permission denied while copying the file: {file_path}")
except Exception as e:
print(f"An error occurred while copying the file: {file_path}")
print(f"Error details: {str(e)}")

265
sijapi/routers/weather.py Normal file
View file

@ -0,0 +1,265 @@
import asyncio
from fastapi import APIRouter, HTTPException
from fastapi import HTTPException
from asyncpg.cursor import Cursor
from httpx import AsyncClient
from typing import Dict
from datetime import datetime
from shapely.wkb import loads
from binascii import unhexlify
from sijapi.utilities import localize_dt
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import VISUALCROSSING_API_KEY, TZ
from sijapi.utilities import get_db_connection, haversine
from sijapi.routers import locate
weather = APIRouter()
async def get_weather(date_time: datetime, latitude: float, longitude: float):
# request_date_str = date_time.strftime("%Y-%m-%d")
DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
fetch_new_data = True
if daily_weather_data:
try:
DEBUG(f"Daily weather data from db: {daily_weather_data}")
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
last_updated = localize_dt(last_updated)
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
stored_loc = loads(stored_loc_data)
stored_lat = stored_loc.y
stored_lon = stored_loc.x
stored_ele = stored_loc.z
hourly_weather = daily_weather_data.get('HourlyWeather')
DEBUG(f"Hourly: {hourly_weather}")
DEBUG(f"\nDEBUG:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n")
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
DEBUG(f"\nDEBUG:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
if last_updated and (date_time <= datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
DEBUG(f"We can use existing data... :')")
fetch_new_data = False
except Exception as e:
ERR(f"Error in get_weather: {e}")
if fetch_new_data:
DEBUG(f"We require new data!")
request_date_str = date_time.strftime("%Y-%m-%d")
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
try:
async with AsyncClient() as client:
response = await client.get(url)
if response.status_code == 200:
DEBUG(f"Successfully obtained data from VC...")
try:
weather_data = response.json()
store_result = await store_weather_to_db(date_time, weather_data)
if store_result == "SUCCESS":
DEBUG(f"New weather data for {request_date_str} stored in database...")
else:
ERR(f"Failed to store weather data for {request_date_str} in database! {store_result}")
DEBUG(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data is not None:
return daily_weather_data
else:
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
except Exception as e:
ERR(f"Problem parsing VC response or storing data: {e}")
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
else:
ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}")
except Exception as e:
ERR(f"Exception during API call: {e}")
return daily_weather_data
async def store_weather_to_db(date_time: datetime, weather_data: dict):
conn = await get_db_connection()
try:
day_data = weather_data.get('days')[0]
DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}")
# Handle preciptype and stations as PostgreSQL arrays
preciptype_array = day_data.get('preciptype', []) or []
stations_array = day_data.get('stations', []) or []
date_str = date_time.strftime("%Y-%m-%d")
# Get location details from weather data if available
longitude = weather_data.get('longitude')
latitude = weather_data.get('latitude')
elevation = locate.get_elevation(latitude, longitude) # 152.4 # default until we add a geocoder that can look up actual elevation; weather_data.get('elevation') # assuming 'elevation' key, replace if different
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
# Correct for the datetime objects
day_data['datetime'] = localize_dt(day_data.get('datetime')) #day_data.get('datetime'))
day_data['sunrise'] = day_data['datetime'].replace(hour=int(day_data.get('sunrise').split(':')[0]), minute=int(day_data.get('sunrise').split(':')[1]))
day_data['sunset'] = day_data['datetime'].replace(hour=int(day_data.get('sunset').split(':')[0]), minute=int(day_data.get('sunset').split(':')[1]))
daily_weather_params = (
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
day_data.get('sunset'), day_data.get('sunsetEpoch'),
day_data.get('description'), day_data.get('tempmax'),
day_data.get('tempmin'), day_data.get('uvindex'),
day_data.get('winddir'), day_data.get('windspeed'),
day_data.get('icon'), datetime.now(),
day_data.get('datetime'), day_data.get('datetimeEpoch'),
day_data.get('temp'), day_data.get('feelslikemax'),
day_data.get('feelslikemin'), day_data.get('feelslike'),
day_data.get('dew'), day_data.get('humidity'),
day_data.get('precip'), day_data.get('precipprob'),
day_data.get('precipcover'), preciptype_array,
day_data.get('snow'), day_data.get('snowdepth'),
day_data.get('windgust'), day_data.get('pressure'),
day_data.get('cloudcover'), day_data.get('visibility'),
day_data.get('solarradiation'), day_data.get('solarenergy'),
day_data.get('severerisk', 0), day_data.get('moonphase'),
day_data.get('conditions'), stations_array, day_data.get('source'),
location_point
)
except Exception as e:
ERR(f"Failed to prepare database query in store_weather_to_db! {e}")
try:
daily_weather_query = '''
INSERT INTO DailyWeather (
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
dew, humidity, precip, precipprob, precipcover, preciptype,
snow, snowdepth, windgust, pressure, cloudcover, visibility,
solarradiation, solarenergy, severerisk, moonphase, conditions,
stations, source, location
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38)
RETURNING id
'''
# Debug logs for better insights
# DEBUG("Executing query: %s", daily_weather_query)
# DEBUG("With parameters: %s", daily_weather_params)
# Execute the query to insert daily weather data
async with conn.transaction():
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
if 'hours' in day_data:
for hour_data in day_data['hours']:
try:
asyncio.sleep(0.1)
# hour_data['datetime'] = parse_date(hour_data.get('datetime'))
hour_timestamp = date_str + ' ' + hour_data['datetime']
hour_data['datetime'] = localize_dt(hour_timestamp)
DEBUG(f"Processing hours now...")
DEBUG(f"Processing {hour_data['datetime']}")
hour_preciptype_array = hour_data.get('preciptype', []) or []
hour_stations_array = hour_data.get('stations', []) or []
hourly_weather_params = (
daily_weather_id,
hour_data['datetime'],
hour_data.get('datetimeEpoch'),
hour_data['temp'],
hour_data['feelslike'],
hour_data['humidity'],
hour_data['dew'],
hour_data['precip'],
hour_data['precipprob'],
hour_preciptype_array,
hour_data['snow'],
hour_data['snowdepth'],
hour_data['windgust'],
hour_data['windspeed'],
hour_data['winddir'],
hour_data['pressure'],
hour_data['cloudcover'],
hour_data['visibility'],
hour_data['solarradiation'],
hour_data['solarenergy'],
hour_data['uvindex'],
hour_data.get('severerisk', 0),
hour_data['conditions'],
hour_data['icon'],
hour_stations_array,
hour_data.get('source', ''),
)
try:
hourly_weather_query = '''
INSERT INTO HourlyWeather (daily_weather_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob,
preciptype, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy,
uvindex, severerisk, conditions, icon, stations, source)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26)
RETURNING id
'''
# Debug logs for better insights
# DEBUG("Executing query: %s", hourly_weather_query)
# DEBUG("With parameters: %s", hourly_weather_params)
# Execute the query to insert hourly weather data
async with conn.transaction():
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
# ERR(f"\n{hourly_weather_id}")
except Exception as e:
ERR(f"EXCEPTION: {e}")
except Exception as e:
ERR(f"EXCEPTION: {e}")
return "SUCCESS"
except Exception as e:
ERR(f"Error in dailyweather storage: {e}")
async def get_weather_from_db(date_time: datetime, latitude: float, longitude: float):
conn = await get_db_connection()
query_date = date_time.date()
try:
# Query to get daily weather data
query = '''
SELECT DW.* FROM DailyWeather DW
WHERE DW.datetime::date = $1
AND ST_DWithin(DW.location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
ORDER BY ST_Distance(DW.location, ST_MakePoint($4, $5)::geography) ASC
LIMIT 1
'''
daily_weather_data = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
if daily_weather_data is None:
DEBUG(f"No daily weather data retrieved from database.")
return None
else:
DEBUG(f"Daily_weather_data: {daily_weather_data}")
# Query to get hourly weather data
query = '''
SELECT HW.* FROM HourlyWeather HW
WHERE HW.daily_weather_id = $1
'''
hourly_weather_data = await conn.fetch(query, daily_weather_data['id'])
day: Dict = {
'DailyWeather': dict(daily_weather_data),
'HourlyWeather': [dict(row) for row in hourly_weather_data],
}
DEBUG(f"day: {day}")
return day
except Exception as e:
ERR(f"Unexpected error occurred: {e}")

52
sijapi/tztest.py Normal file
View file

@ -0,0 +1,52 @@
import os
import pandas as pd
from typing import Optional
from scipy.spatial import cKDTree
import numpy as np
from dotenv import load_dotenv
from dateutil import tz
from pathlib import Path
BASE_DIR = Path(__file__).resolve().parent
CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = CONFIG_DIR / ".env"
DATA_DIR = BASE_DIR / "data"
GEONAMES_TXT = DATA_DIR / "geonames.txt"
load_dotenv(ENV_PATH)
def load_geonames_data(path: str):
columns = ['geonameid', 'name', 'asciiname', 'alternatenames',
'latitude', 'longitude', 'feature_class', 'feature_code',
'country_code', 'cc2', 'admin1_code', 'admin2_code', 'admin3_code',
'admin4_code', 'population', 'elevation', 'dem', 'timezone', 'modification_date']
data = pd.read_csv(
path,
sep='\t',
header=None,
names=columns,
low_memory=False
)
return data
def find_timezone(data: pd.DataFrame, lat: float, lon: float, country_code: Optional[str] = None):
if country_code:
data = data[data['country_code'] == country_code]
# build the spatial index (KDTree)
spatial_index = cKDTree(data[['latitude', 'longitude']].values)
# find the closest index in our spatial index for each point on the grid
_, idx = spatial_index.query([(lat, lon)], k=1)
# get timezone for the closest geonameid
timezone = data.iloc[idx]['timezone'].values[0]
return timezone
df = load_geonames_data(GEONAMES_TXT)
timezone = find_timezone(df, 42.5, 1.5, 'AD')
print(timezone)

427
sijapi/utilities.py Normal file
View file

@ -0,0 +1,427 @@
import re
import os
from fastapi import Form
import re
import io
from io import BytesIO
import base64
import math
from dateutil import parser
from pathlib import Path
import filetype
from PyPDF2 import PdfReader
from pdfminer.high_level import extract_text as pdfminer_extract_text
import pytesseract
from pdf2image import convert_from_path
from datetime import datetime, date, time
from typing import Optional, Union, Tuple
import asyncio
from PIL import Image
from dateutil.parser import parse as dateutil_parse
from docx import Document
import asyncpg
from sshtunnel import SSHTunnelForwarder
from fastapi import Depends, HTTPException, Request, UploadFile
from fastapi.security.api_key import APIKeyHeader
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import DB, GLOBAL_API_KEY, DB, DB_HOST, DB_PORT, DB_USER, DB_PASS, TZ, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR
api_key_header = APIKeyHeader(name="Authorization")
def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
if request.url.path not in ["/health", "/ip", "/pgp"]:
api_key_query = request.query_params.get("api_key")
if api_key_header:
api_key = api_key.lower().split("bearer ")[-1]
if api_key != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
raise HTTPException(status_code=401, detail="Invalid or missing API key")
def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str = None, extension: str = None, no_timestamp: bool = False) -> Tuple[Path, Path]:
'''
Obsidian helper. Takes a datetime and optional subdirectory name, filename, and extension.
If an extension is provided, it ensures the path is to a file with that extension.
If no extension is provided, it treats the path as a directory.
'''
year = date_time.strftime(YEAR_FMT)
month = date_time.strftime(MONTH_FMT)
day = date_time.strftime(DAY_FMT)
day_short = date_time.strftime(DAY_SHORT_FMT)
timestamp = date_time.strftime("%H%M%S")
relative_path = Path("journal") / year / month / day
if not subdir and not filename and not extension:
# standard daily note handler, where only the date_time was specified:
relative_path = relative_path / f"{day}.md"
else:
if subdir:
# datestamped subdirectory handler
relative_path = relative_path / f"{day_short} {subdir}"
if filename:
if no_timestamp:
filename = f"{day_short} {sanitize_filename(filename)}"
else:
filename = f"{day_short} {timestamp} {sanitize_filename(filename)}"
if extension:
extension = extension if extension.startswith(".") else f".{extension}"
filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
relative_path = relative_path / filename
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
os.makedirs(absolute_path.parent, exist_ok=True)
return absolute_path, relative_path
def prefix_lines(text: str, prefix: str = '> ') -> str:
lines = text.split('\n')
prefixed_lines = [f"{prefix}{line.lstrip()}" for line in lines]
return '\n'.join(prefixed_lines)
def f(file):
if hasattr(file, 'read') and callable(file.read):
return file
if isinstance(file, (bytes, bytearray)):
return file
if isinstance(file, Path):
file_path = file
elif isinstance(file, str):
file_path = Path(file)
else:
raise TypeError("Invalid file type. Expected str, Path, or file-like object.")
with open(file_path, 'rb') as thefile:
return thefile
def get_extension(file):
try:
if isinstance(file, str):
file_path = Path(file)
elif isinstance(file, Path):
file_path = file
else:
file_path = Path(file.filename)
file_extension = file_path.suffix
return file_extension
except Exception as e:
ERR(f"Unable to get extension of {file}")
raise e
def sanitize_filename(text, max_length=255):
"""Sanitize a string to be used as a safe filename."""
DEBUG(f"Filename before sanitization: {text}")
sanitized = re.sub(r'[^\w\s\.-]', '', text).strip()
final_filename = sanitized[:max_length]
DEBUG(f"Filename after sanitization: {final_filename}")
return final_filename
def bool_convert(value: str = Form(None)):
return value.lower() in ["true", "1", "t", "y", "yes"]
def str_to_bool(value: str) -> bool:
"""
Convert a string to a boolean.
Interprets 'true', '1', 'yes', 'y' as True.
Interprets 'false', '0', 'no', 'n', '', or any other string as False.
"""
def get_timestamp():
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
async def extract_text(file_path: str) -> str:
"""Extract text from file."""
if file_path.endswith('.pdf'):
return await extract_text_from_pdf(file_path)
elif file_path.endswith('.docx'):
return await extract_text_from_docx(file_path)
def clean_text(text):
text = text.replace('-', '')
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'[\u200B-\u200D\uFEFF]', '', text)
return text.strip()
async def ocr_pdf(file_path: str) -> str:
try:
images = await asyncio.to_thread(convert_from_path, file_path)
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
return ' '.join(texts)
except Exception as e:
ERR(f"Error during OCR: {str(e)}")
return ""
async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path):
ERR(f"Invalid PDF file: {file_path}")
return ""
text = ''
num_pages = 0
# First, attempt to extract text using PyPDF2
try:
reader = await asyncio.to_thread(PdfReader, file_path)
for page in reader.pages:
text_content = page.extract_text() + ' ' if page.extract_text() else ''
text += text_content
num_pages = len(reader.pages)
# If text was extracted successfully and it's deemed sufficient, return it
if text and not should_use_ocr(text, num_pages):
return clean_text(text)
except Exception as e:
ERR(f"Error extracting text with PyPDF2: {str(e)}")
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
return clean_text(text_pdfminer)
except Exception as e:
ERR(f"Error extracting text with pdfminer.six: {e}")
# If both methods fail or are deemed insufficient, use OCR as the last resort
INFO("Falling back to OCR for text extraction...")
return await ocr_pdf(file_path)
async def is_valid_pdf(file_path: str) -> bool:
"""Check if the file at file_path is a valid PDF."""
try:
kind = filetype.guess(file_path)
return kind.mime == 'application/pdf'
except Exception as e:
ERR(f"Error checking file type: {e}")
return False
async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path):
WARN(f"Invalid PDF file: {file_path}")
return ""
text = ''
try:
reader = await asyncio.to_thread(PdfReader, file_path)
for page in reader.pages:
text_content = page.extract_text() + ' ' if page.extract_text() else ''
text += text_content
if text.strip(): # Successfully extracted text
return clean_text(text)
except Exception as e:
ERR(f"Error extracting text with PyPDF2: {str(e)}")
try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer.strip(): # Successfully extracted text
return clean_text(text_pdfminer)
except Exception as e:
ERR(f"Error extracting text with pdfminer.six: {str(e)}")
# Fall back to OCR
INFO("Falling back to OCR for text extraction...")
try:
images = convert_from_path(file_path)
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
return ' '.join(ocr_texts).strip()
except Exception as e:
WARN(f"OCR failed: {str(e)}")
return ""
async def extract_text_from_docx(file_path: str) -> str:
def read_docx(file_path):
doc = Document(file_path)
full_text = [paragraph.text for paragraph in doc.paragraphs]
return '\n'.join(full_text)
return await asyncio.to_thread(read_docx, file_path)
# Correcting read_text_file to be asynchronous
async def read_text_file(file_path: str) -> str:
# This opens and reads a file asynchronously by offloading to a separate thread
return await asyncio.to_thread(_sync_read_text_file, file_path)
def _sync_read_text_file(file_path: str) -> str:
# Actual synchronous file reading operation
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def should_use_ocr(text, num_pages) -> bool:
if not text:
return True # No text was extracted, use OCR
word_count = len(text.split())
avg_words_per_page = word_count / num_pages
return avg_words_per_page < 10
def convert_to_unix_time(iso_date_str):
dt = parser.parse(iso_date_str) # Automatically parses datetime with timezone
return int(dt.timestamp())
async def get_db_connection():
conn = await asyncpg.connect(
database=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
return conn
temp = """
def get_db_connection_ssh(ssh: bool = True):
if ssh:
with SSHTunnelForwarder(
(DB_SSH, 22),
DB_SSH_USER=DB_SSH_USER,
DB_SSH_PASS=DB_SSH_PASS,
remote_bind_address=DB_SSH,
local_bind_address=(DB_HOST, DB_PORT)
) as tunnel: conn = psycopg2.connect(
dbname=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
else:
conn = psycopg2.connect(
dbname=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
return conn
"""
def db_localized():
# ssh = True if TS_IP == DB_SSH else False
return get_db_connection()
def haversine(lat1, lon1, lat2, lon2):
""" Calculate the great circle distance between two points on the earth specified in decimal degrees. """
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
c = 2 * math.asin(math.sqrt(a))
r = 6371 # Radius of Earth in kilometers
return c * r
def convert_degrees_to_cardinal(d):
"""
Convert degrees to cardinal directions
"""
dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
ix = round(d / (360. / len(dirs)))
return dirs[ix % len(dirs)]
def localize_dt(dt):
initial_dt = dt
try:
if isinstance(dt, str):
dt = dateutil_parse(dt)
DEBUG(f"{initial_dt} was a string so we attempted converting to datetime. Result: {dt}")
if isinstance(dt, datetime):
DEBUG(f"{dt} is a datetime object, so we will ensure it is tz-aware.")
if dt.tzinfo is None:
dt = dt.replace(tzinfo=TZ)
# DEBUG(f"{dt} should now be tz-aware. Returning it now.")
return dt
else:
# DEBUG(f"{dt} already was tz-aware. Returning it now.")
return dt
else:
ERR(f"Conversion failed")
raise TypeError("Conversion failed")
except Exception as e:
ERR(f"Error parsing datetime: {e}")
raise TypeError("Input must be a string or datetime object")
HOURLY_COLUMNS_MAPPING = {
"12am": "00:00:00",
"2am": "02:00:00",
"4am": "04:00:00",
"6am": "06:00:00",
"8am": "08:00:00",
"10am": "10:00:00",
"12pm": "12:00:00",
"2pm": "14:00:00",
"4pm": "16:00:00",
"6pm": "18:00:00",
"8pm": "20:00:00",
"10pm": "22:00:00",
}
def convert_to_12_hour_format(datetime_obj_or_str):
if isinstance(datetime_obj_or_str, str):
try:
datetime_obj = datetime.strptime(datetime_obj_or_str, "%Y-%m-%d %H:%M:%S")
except ValueError:
try:
datetime_obj = datetime.strptime(datetime_obj_or_str, "%H:%M:%S")
except ValueError:
return "Invalid datetime string format"
elif isinstance(datetime_obj_or_str, time):
datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S")
else:
datetime_obj = datetime_obj_or_str
if isinstance(datetime_obj_or_str, str):
time24 = datetime_obj_or_str
else:
time24 = datetime_obj.strftime("%H:%M:%S")
reverse_mapping = {v: k for k, v in HOURLY_COLUMNS_MAPPING.items()}
return reverse_mapping.get(time24, "Invalid time")
def encode_image_to_base64(image_path):
if os.path.exists(image_path):
with Image.open(image_path) as image:
output_buffer = BytesIO()
image.save(output_buffer, format='JPEG')
byte_data = output_buffer.getvalue()
base64_str = base64.b64encode(byte_data).decode('utf-8')
return base64_str
else:
DEBUG(f"Error: File does not exist at {image_path}")
def resize_and_convert_image(image_path, max_size=2160, quality=80):
with Image.open(image_path) as img:
# Resize image
ratio = max_size / max(img.size)
new_size = tuple([int(x * ratio) for x in img.size])
img = img.resize(new_size, Image.Resampling.LANCZOS)
# Convert to jpg
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='JPEG', quality=quality)
img_byte_arr = img_byte_arr.getvalue()
return img_byte_arr

247
tests/__init__2.py Normal file
View file

@ -0,0 +1,247 @@
import os
from pathlib import Path
import ipaddress
from datetime import datetime, timedelta
from dotenv import load_dotenv
from pathlib import Path
from O365 import Account, FileSystemTokenBackend
import logging
from logging.handlers import RotatingFileHandler
from colorama import Fore, Style, init
init(autoreset=True)
class ColorFormatter(logging.Formatter):
"""Custom formatter to add colors to log levels."""
COLOR_MAP = {
logging.DEBUG: Fore.GREEN,
logging.INFO: Fore.LIGHTBLUE_EX,
logging.WARNING: Fore.YELLOW,
logging.ERROR: Fore.RED,
logging.CRITICAL: Fore.MAGENTA,
}
def format(self, record):
color = self.COLOR_MAP.get(record.levelno, Fore.WHITE)
record.levelname = f"{color}{record.levelname}{Style.RESET_ALL}"
return super().format(record)
def setup_logger():
"""Function to setup a logger; can have multiple handlers"""
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
color_formatter = ColorFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler = RotatingFileHandler(f'{LOGS_DIR}/app.log', maxBytes=2000000, backupCount=10)
handler.setFormatter(formatter)
console_handler = logging.StreamHandler()
console_handler.setFormatter(color_formatter)
logger = logging.getLogger('LOGGER')
logger.setLevel(logging.INFO)
# logger.addHandler(handler)
logger.addHandler(console_handler)
return logger
LOGGER = setup_logger()
### Initial initialization
BASE_DIR = Path(__file__).resolve().parent
CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = CONFIG_DIR / ".env"
load_dotenv(ENV_PATH)
### API essentials
ROUTERS = os.getenv('ROUTERS', '').split(',')
PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',')
GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY")
HOST_NET = os.getenv("HOST_NET", "127.0.0.1")
HOST_PORT = int(os.getenv("HOST_PORT", 4444))
HOST = f"{HOST_NET}:{HOST_PORT}"
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
TRUSTED_SUBNETS = [ipaddress.ip_network(subnet.strip()) for subnet in os.getenv('TRUSTED_SUBNETS', '127.0.0.1/32').split(',')]
### Directories & general paths
HOME_DIR = Path.home()
ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts"
os.makedirs(ALERTS_DIR, exist_ok=True)
LOGS_DIR = BASE_DIR / "logs"
os.makedirs(LOGS_DIR, exist_ok=True)
REQUESTS_DIR = LOGS_DIR / "requests"
os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
### Docs & images
DOC_DIR = DATA_DIR / "docs"
os.makedirs(DOC_DIR, exist_ok=True)
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
### Obsidian & notes
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or HOME_DIR / "Nextcloud" / "notes")
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
OBSIDIAN_BANNER_DIR = OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR / "banners"
os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR, exist_ok=True)
### Database
DB = os.getenv("DB", 'sijdb')
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
DB_PORT = os.getenv("DB_PORT", 5432)
DB_USER = os.getenv("DB_USER", 'sij')
DB_PASS = os.getenv("DB_PASS")
DB_SSH = os.getenv("DB_SSH", "100.64.64.15")
DB_SSH_USER = os.getenv("DB_SSH_USER")
DB_SSH_PASS = os.getenv("DB_SSH_ENV")
DB_URL = f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB}'
### Large language model
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "dolphin-mistral")
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", 'joanne')
### Stable diffusion
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
os.makedirs(SD_WORKFLOWS_DIR, exist_ok=True)
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
### Summarization
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 100)) # measured in tokens
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
SUMMARY_INSTRUCT = os.getenv("SUMMARY_INSTRUCT", "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "llama3")
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 4096))
### ASR & TTS
ASR_DIR = DATA_DIR / "asr"
os.makedirs(ASR_DIR, exist_ok=True)
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'small,base,base-en,tiny,medium,medium-en,large,large-v2,large-v3').split(',')
TTS_DIR = DATA_DIR / "tts"
os.makedirs(TTS_DIR, exist_ok=True)
VOICE_DIR = TTS_DIR / 'voices'
os.makedirs(VOICE_DIR, exist_ok=True)
PODCAST_DIR = TTS_DIR / "sideloads"
os.makedirs(PODCAST_DIR, exist_ok=True)
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
HOME_ZIP = os.getenv("HOME_ZIP")
LOCATION_OVERRIDES = DATA_DIR / "loc_overrides.json"
LOCATIONS_CSV = DATA_DIR / "US.csv"
# DB = DATA_DIR / "weatherlocate.db" # deprecated
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
### Calendar & email account
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE", "False") == "True" else False
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE", "False") == "True" else False
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
IMAP_HOST = os.getenv('IMAP_HOST', '127.0.0.1')
EMAIL_ADDRESS = os.getenv('EMAIL_ADDRESS')
EMAIL_PASS = os.getenv('EMAIL_PASS')
IMAP_PORT = int(os.getenv('IMAP_PORT', 1143))
IMAP_ENCRYPTION = os.getenv('IMAP_ENCRYPTION', 'STARTTLS')
SMTP_PORT = int(os.getenv('SMTP_PORT', 1025))
SMTP_ENCRYPTION = os.getenv('SMTP_ENCRYPTION', 'SSL')
PUBLIC_KEY = os.getenv('PUBLIC_KEY')
### Courtlistener & other webhooks
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
### Passwords & API keys
MAC_ID = os.getenv("MAC_ID")
MAC_UN = os.getenv("MAC_UN")
MAC_PW = os.getenv("MAC_PW")
TIMING_API_KEY = os.getenv("TIMING_API_KEY")
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
### Tailscale
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
TS_ID = os.getenv("TS_ID", "NULL")
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
### Cloudflare
CF_TOKEN = os.getenv("CF_TOKEN")
CF_IP = DATA_DIR / "cf_ip.txt"
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json"
### Caddy
BASE_URL = os.getenv("BASE_URL")
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
### Maintenance
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours
### Microsoft Graph
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
MS365_SECRET = os.getenv('MS365_SECRET')
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
MS365_LOGIN_URL = os.getenv('MS365_LOGIN_URL', 'https://login.microsoftonline.com')
MS365_AUTHORITY_URL = f'{MS365_LOGIN_URL}/{MS365_TENANT_ID}'
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access'.split(',')) #["Calendars.Read", "Calendars.ReadWrite", "offline_access"]
# ["https://graph.microsoft.com/.default"] # OAUTH_SCOPES = os.getenv('OAUTH_SCOPES', 'basic,calendar').split(',')
with open(MS365_KEY_PATH, 'r') as private_key_file:
MS365_KEY = private_key_file.read()
# MS365_TOKEN_BACKEND = FileSystemTokenBackend(token_path=MS365_TOKEN_PATH)
# MS365_CREDENTIALS = (MS365_CLIENT_ID, MS365_SECRET)
# MS365_ACCOUNT = Account(
# credentials=MS365_CREDENTIALS,
# token_backend=MS365_TOKEN_BACKEND,
# tenant_id=MS365_TENANT_ID,
# scopes=MS365_SCOPE,
# auth_flow_type='authorization'
# )
LOGGER.critical(f"Visit https://api.sij.ai/o365/login")

189
tests/asr_faster.py Normal file
View file

@ -0,0 +1,189 @@
from fastapi import APIRouter, HTTPException, UploadFile, Form, status, Response
from fastapi.responses import FileResponse, JSONResponse
from starlette.concurrency import run_in_threadpool
from io import BytesIO
from typing import Optional
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from sijapi import WHISPER_CPP_MODELS
import torchaudio
import ffmpeg
from faster_whisper import WhisperModel
### INITIALIZATIONS ###
asr = APIRouter(tags=["trusted", "private"])
WHISPER_CPP_MODELS = {"small", "base", "base-en", "tiny", "medium", "medium-en", "large", "large-v2", "large-v3"}
async def faster_whisper_mps(audio_bytes: bytes, subtitle: bool = False, model: str = "small", language: str = None, task: str = "transcribe") -> str:
model_name = f"openai/whisper-{model}"
logger.debug(f"Using model: {model_name} with language setting: {language}")
# Load the Whisper model and processor
model = WhisperForConditionalGeneration.from_pretrained(model_name, low_cpu_mem_usage=True)
processor = WhisperProcessor.from_pretrained(model_name)
# Decode audio using torchaudio
audio_io = BytesIO()
try:
audio_output = ffmpeg.input('pipe:0').output('pipe:1', format='wav').run(input=audio_bytes, capture_stdout=True, capture_stderr=True)
audio_io.write(audio_output[0])
audio_io.seek(0)
except Exception as e: # This will catch any type of exception
print(f"Caught an error: {e}")
# Now let's load it using torchaudio
waveform, sampling_rate = torchaudio.load(audio_io)
waveform = waveform.mean(dim=0, keepdim=True) # Convert to mono
# Process the audio to get input features for the model
inputs = processor(waveform.squeeze().numpy(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
# Generate transcription with the model
with torch.no_grad():
outputs = model.generate(inputs["input_tensors"], return_segments=True)
# Decode the generated tokens to text
transcription = processor.batch_decode(outputs, skip_special_tokens=True)[0]
if subtitle:
segments = outputs.segments
output_text = ""
sequence_number = 1
for segment in segments:
start_time = faster_whisper_seconds_to_srt_time(segment["start"])
end_time = faster_whisper_seconds_to_srt_time(segment["end"])
text = segment["text"].strip().replace("\n", " ").replace("\r", "") # Clean up text
output_text += f"{sequence_number}\n{start_time} --> {end_time}\n{text}\n\n"
sequence_number += 1
return output_text.strip().replace("\r", "") # Remove any remaining carriage return characters
else:
return transcription.strip()
def faster_whisper_transcribe(whisper, audio_io, language, task):
# Transcribe the audio and get segments
segments, info = whisper.transcribe(audio=audio_io, language=language, task=task, beam_size=5)
return segments, info
async def faster_whisper_cpu(audio_io: BytesIO, subtitle: bool = False, model: str = "small", language: str = None, task: str = "transcribe") -> str:
logger.debug(f"Using model: {model} with language setting: {language}")
whisper = WhisperModel(model, device="cpu", compute_type="int8")
# Run the blocking transcribe method in a thread pool
segments, info = await run_in_threadpool(faster_whisper_transcribe, whisper, audio_io, language, task)
output_text = ""
logger.debug(f"Detected language {info.language} with probability {info.language_probability}%")
if subtitle:
sequence_number = 1
for segment in segments:
start_time = faster_whisper_seconds_to_srt_time(segment.start)
end_time = faster_whisper_seconds_to_srt_time(segment.end)
text = segment.text.strip().replace("\n", " ").replace("\r", "") # Clean up text
output_text += f"{sequence_number}\n{start_time} --> {end_time}\n{text}\n\n"
sequence_number += 1
else:
for segment in segments:
output_text += f"{segment.text.strip()} "
if subtitle:
return output_text.strip().replace("\r", "") # Remove any remaining carriage return characters
else:
return output_text.strip() # Remove any remaining carriage return characters
def faster_whisper_seconds_to_srt_time(seconds: float) -> str:
"""Convert seconds to SRT time format HH:MM:SS,mmm."""
hours, remainder = divmod(seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = (seconds - int(seconds)) * 1000
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
@asr.post("/asr")
@asr.post("/asr/whisper")
@asr.post("/v1/audio/transcriptions", response_class=FileResponse)
async def faster_whisper_mps_endpoint(
file: Optional[UploadFile] = Form(None),
audio_file: Optional[UploadFile] = Form(None),
subtitle: bool = Form(False),
model: str = Form(None),
output: str = "text",
task: str = "transcribe",
language: str = None,
word_timestamps: bool = False,
encode: bool = True
) -> FileResponse:
if not file and not audio_file:
raise HTTPException(status_code=400, detail="Either 'file' or 'audio_file' must be provided.")
used_file = file or audio_file
model = model if model in WHISPER_CPP_MODELS else "small"
logger.debug(f"Received request with model: {model} and language: {language}")
content = await used_file.read()
output_content = await faster_whisper_mps(content, subtitle, model, language, task) # Pass language to whisper_mps
# Save the output content to a file if subtitle is True
output_filename = "output.srt"
if subtitle:
with open(output_filename, "w") as f:
f.write(output_content)
return FileResponse(output_filename, media_type='application/x-subrip', filename=output_filename)
else:
logger.debug(output_content)
return JSONResponse(content={"text": output_content})
@asr.options("/asr")
@asr.options("/asr/whisper")
@asr.options("/v1/audio/transcriptions", tags=["transcriptions"])
async def faster_whisper_options_for_mps():
headers = {
"Access-Control-Allow-Origin": "*", # Specify domains or use '*' for all
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization",
"Access-Control-Max-Age": "86400", # 24 hours
}
return Response(status_code=status.HTTP_204_NO_CONTENT, headers=headers)
@asr.post("/asr2")
@asr.post("/asr/whisper2")
@asr.post("/v1/audio/transcriptions2", response_class=FileResponse)
async def faster_whisper_endpoint2(
file: Optional[UploadFile] = Form(None),
audio_file: Optional[UploadFile] = Form(None),
subtitle: bool = Form(False),
model: str = Form(None),
output: str = "text",
task: str = "transcribe",
language: str = None,
word_timestamps: bool = False,
encode: bool = True
) -> FileResponse:
if not file and not audio_file:
raise HTTPException(status_code=400, detail="Either 'file' or 'audio_file' must be provided.")
used_file = file or audio_file
model = model if model in WHISPER_CPP_MODELS else "small"
logger.debug(f"Received request with model: {model} and language: {language}")
content = await used_file.read()
audio_io = BytesIO(content)
output_content = await faster_whisper_cpu(audio_io, subtitle, model, language, task) # Pass language to faster_whisper_cpu
# Save the output content to a file if subtitle is True
output_filename = "output.srt"
if subtitle:
with open(output_filename, "w") as f:
f.write(output_content)
return FileResponse(output_filename, media_type='application/x-subrip', filename=output_filename)
else:
logger.debug(output_content)
return JSONResponse(content={"text": output_content})
@asr.options("/asr2")
@asr.options("/asr/whisper2")
@asr.options("/v1/audio/transcriptions2", tags=["transcriptions"])
async def faster_whisper_options_2():
headers = {
"Access-Control-Allow-Origin": "*", # Specify domains or use '*' for all
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization",
"Access-Control-Max-Age": "86400", # 24 hours
}
return Response(status_code=status.HTTP_204_NO_CONTENT, headers=headers)

102
tests/asr_lightning.py Normal file
View file

@ -0,0 +1,102 @@
from ffmpeg import input as ffmpeg_input
from ffmpeg import output as ffmpeg_output
from ffmpeg import run as ffmpeg_run
import numpy as np
import os
import soundfile as sf
from fastapi import APIRouter, HTTPException, UploadFile, Form, status, Response
from fastapi.responses import FileResponse, JSONResponse
from io import BytesIO
from typing import Optional
from whisperplus.pipelines.lightning_whisper_mlx import LightningWhisperMLX
## INITIALIZATIONS ##
asr = APIRouter(tags=["trusted", "private"])
WHISPER_CPP_MODELS = {"small", "base", "base-en", "tiny", "medium", "medium-en", "large", "large-v2", "large-v3"}
async def lightningmlx_whisper(audio_bytes: bytes, subtitle: bool = False, model: str = "distil-large-v3", language: str = None, task: str = "transcribe") -> str:
whisper = LightningWhisperMLX(model=model, batch_size=12, quant=None)
# Convert audio file to .wav format
input_file = 'input_file'
output_file = 'output_file.wav'
with open(input_file, 'wb') as f:
f.write(audio_bytes)
ffmpeg_input(input_file).output(output_file).run()
# Load the wav file
audio, samplerate = sf.read(output_file)
output = whisper.transcribe(audio, language)["text"]
# Clean up temporary files
os.remove(input_file)
os.remove(output_file)
if subtitle:
segments = whisper.get_segments(audio, language)
output_text = ""
sequence_number = 1
for segment in segments:
start_time = lightningmlx_seconds_to_srt_time(segment["start"])
end_time = lightningmlx_seconds_to_srt_time(segment["end"])
text = segment["text"].strip().replace("\n", " ").replace("\r", "") # Clean up text
output_text += f"{sequence_number}\n{start_time} --> {end_time}\n{text}\n\n"
sequence_number += 1
return output_text.strip().replace("\r", "") # Remove any remaining carriage return characters
else:
return output.strip()
def lightningmlx_seconds_to_srt_time(seconds: float) -> str:
"""Convert seconds to SRT time format HH:MM:SS,mmm."""
hours, remainder = divmod(seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = (seconds - int(seconds)) * 1000
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
@asr.post("/asr")
@asr.post("/asr/whisper")
@asr.post("/v1/audio/transcriptions", response_class=FileResponse)
async def lightningmlx_whisper_endpoint(
file: Optional[UploadFile] = Form(None),
audio_file: Optional[UploadFile] = Form(None),
subtitle: bool = Form(False),
model: str = Form(None),
output: str = "text",
task: str = "transcribe",
language: str = None,
word_timestamps: bool = False,
encode: bool = True
) -> FileResponse:
if not file and not audio_file:
raise HTTPException(status_code=400, detail="Either 'file' or 'audio_file' must be provided.")
used_file = file or audio_file
model = model if model in WHISPER_CPP_MODELS else "distil-large-v3"
logger.debug(f"Received request with model: {model} and language: {language}")
content = await used_file.read()
output_content = await lightningmlx_whisper(content, subtitle, model, language, task) # Pass language to whisperplus_mlx
# Save the output content to a file if subtitle is True
output_filename = "output.srt"
if subtitle:
with open(output_filename, "w") as f:
f.write(output_content)
return FileResponse(output_filename, media_type='application/x-subrip', filename=output_filename)
else:
logger.debug(output_content)
return JSONResponse(content={"text": output_content})
@asr.options("/asr")
@asr.options("/asr/whisper")
@asr.options("/v1/audio/transcriptions", tags=["transcriptions"])
async def lightningmlx_whisper_options():
headers = {
"Access-Control-Allow-Origin": "*", # Specify domains or use '*' for all
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization",
"Access-Control-Max-Age": "86400", # 24 hours
}
return Response(status_code=status.HTTP_204_NO_CONTENT, headers=headers)

117
tests/asr_wp.py Normal file
View file

@ -0,0 +1,117 @@
from fastapi import APIRouter, UploadFile, Form, HTTPException, File
from fastapi.responses import JSONResponse, FileResponse
from pydantic import BaseModel, HttpUrl
from whisperplus.pipelines import mlx_whisper
from youtube_dl import YoutubeDL
import subprocess
import os
import uuid
import shutil
import time
from typing import Optional
from threading import Thread
from sijapi import ASR_DIR, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL
asr = APIRouter(tags=["trusted", "private"])
WHISPER_CPP_MODELS = {"small", "base", "base-en", "tiny", "medium", "medium-en", "large", "large-v2", "large-v3", "mlx-community/whisper-large-v3-mlx"}
class TranscribedText(BaseModel):
text: str
def wp_convert_to_mp3(file_path: str):
mp3_file_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
subprocess.run(["ffmpeg", "-y", "-i", file_path, mp3_file_path], check=True)
return mp3_file_path
def wp_download_from_youtube(url: str):
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
ytdl_opts = {
'outtmpl': temp_file,
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
'nooverwrites': True
}
with YoutubeDL(ytdl_opts) as ydl:
ydl.download([url])
return temp_file
def wp_format_srt_timestamp(seconds: float):
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
def wp_write_srt(segments: list, output_file: str):
with open(output_file, 'w') as f:
for i, segment in enumerate(segments, start=1):
start = wp_format_srt_timestamp(segment['start'])
end = wp_format_srt_timestamp(segment['end'])
text = segment['text']
f.write(f"{i}\n{start} --> {end}\n{text}\n\n")
def wp_cleanup_files():
while True:
now = time.time()
for filename in os.listdir(ASR_DIR):
file_path = os.path.join(ASR_DIR, filename)
if os.path.isfile(file_path):
file_age = now - os.path.getmtime(file_path)
if file_age > GARBAGE_TTL:
os.remove(file_path)
time.sleep(GARBAGE_COLLECTION_INTERVAL)
@asr.post("/asr")
async def wp_asr_endpoint(
url: Optional[HttpUrl] = None,
file: Optional[UploadFile] = File(None),
output: str = Form('txt'),
language: Optional[str] = Form(None),
task: str = Form('transcribe'),
model: str = Form('large-v3')
):
response = await wp_do_asr(url, file, output, language, task, model)
if output == 'srt':
return FileResponse(response, media_type='application/x-subrip', filename='subtitle.srt')
elif output == 'json':
return JSONResponse(content=response)
else:
return response
async def wp_do_asr(url: str = None, file: File = None, output: str = 'txt', language: str = 'en', task: str = 'transcribe', model: str = 'small'):
if url:
audio_path = wp_download_from_youtube(url)
elif file:
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}_{file.filename}")
with open(temp_file, 'wb') as out:
out.write(await file.read())
audio_path = wp_convert_to_mp3(temp_file)
else:
raise HTTPException(status_code=400, detail="Either URL or file must be provided")
transcribe_args = {
'path_or_hf_repo': f"mlx-community/whisper-{model}-mlx",
'task': task
}
if language:
transcribe_args['language'] = language
result = mlx_whisper.transcribe(audio_path, **transcribe_args)
if output == 'srt':
srt_output_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.srt")
wp_write_srt(result['segments'], srt_output_path)
return srt_output_path
elif output == 'json':
return {"text": result.get("text"), "segments": result.get("segments")}
else:
return result.get("text")
# Start the cleanup thread
cleanup_thread = Thread(target=wp_cleanup_files, daemon=True)
cleanup_thread.start()

416
tests/locate copy.py Normal file
View file

@ -0,0 +1,416 @@
from fastapi import APIRouter, HTTPException, Query, Response
from fastapi.responses import JSONResponse, HTMLResponse
import pytz
from datetime import datetime
from typing import List
from fastapi import HTTPException
from fastapi.responses import HTMLResponse
import folium
import os
import asyncpg
import time as timers
import json
from pydantic import BaseModel
from typing import Optional, Any, Dict, List
from pathlib import Path
from datetime import datetime, timedelta, time
import pandas
import math
import pytz
import httpx
from pydantic import BaseModel
from typing import Optional, Union
from fastapi.encoders import jsonable_encoder
import psycopg2
import requests
from pathlib import Path
from fastapi.responses import PlainTextResponse
from psycopg2.extras import RealDictCursor
import pytz
from datetime import datetime
from typing import List
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, HOME_DIR, DATA_DIR, VISUALCROSSING_API_KEY, LOCATIONS_CSV, DB, BASE_URL, GLOBAL_API_KEY, TS_IP, DB, DB_HOST, DB_PORT, DB_USER, DB_PASS, DB_SSH, DB_SSH_USER, DB_SSH_PASS
from sijapi.utilities import db_localized
from sijapi.routers.weather import get_weather
locate = APIRouter()
class Location(BaseModel):
latitude: Optional[float] = None
longitude: Optional[float] = None
zip: Optional[str] = None
street: Optional[str] = None
city: Optional[str] = None
state: Optional[str] = None
elevation: Optional[float] = None
date: Optional[datetime] = None
country: Optional[str] = None
def map_location_data(location_data):
return {
'id': location_data[0],
'datetime': location_data[1],
'latitude': location_data[2],
'longitude': location_data[3],
'city': location_data[4],
'state': location_data[5],
'zip': location_data[6],
'street': location_data[7],
'elevation': location_data[8]
}
async def geocode_location(zip_code: Optional[str] = None, latitude: Optional[float] = None, longitude: Optional[float] = None, city: Optional[str] = None, state: Optional[str] = None, country_code: str = 'US') -> Location:
try:
# Establish the database connection
conn = db_localized()
# Build the SQL query based on the provided parameters
query = "SELECT id, street, city, state, country, latitude, longitude, zip, elevation, datetime, date, ST_Distance(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326)) AS distance FROM Locations"
conditions = []
params = []
if latitude is not None and longitude is not None:
conditions.append("ST_DWithin(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326), 50000)") # 50 km radius
params.extend([longitude, latitude])
if zip_code:
conditions.append("zip = $3 AND country = $4")
params.extend([zip_code, country_code])
if city and state:
conditions.append("city ILIKE $5 AND state ILIKE $6 AND country = $7")
params.extend([city, state, country_code])
if conditions:
query += " WHERE " + " OR ".join(conditions)
query += " ORDER BY distance LIMIT 1;"
DEBUG(f"Executing query: {query} with params: {params}")
# Execute the query with the provided parameters
result = await conn.fetchrow(query, *params)
# Close the connection
await conn.close()
if result:
location_info = Location(
latitude=result['latitude'],
longitude=result['longitude'],
zip=result['zip'],
street=result.get('street', ''),
city=result['city'],
state=result['state'],
country=result['country'],
elevation=result.get('elevation', 0),
distance=result.get('distance')
)
DEBUG(f"Found location: {location_info}")
return location_info
else:
DEBUG("No location found with provided parameters.")
return Location()
except Exception as e:
ERR(f"Error occurred: {e}")
raise Exception("An error occurred while processing your request")
def post_location(date: datetime, location):
pacific = pytz.timezone('America/Los_Angeles')
datetime_entry = pacific.localize(date).isoformat()
conn = db_localized()
cursor = conn.cursor()
# Prepare data to insert or update
data_to_insert = (
datetime_entry,
location.latitude,
location.longitude,
location.city,
location.state,
location.zip,
location.street,
location.elevation
)
# Check if an entry exists
cursor.execute('''
SELECT id FROM locations
WHERE datetime = ? AND latitude = ? AND longitude = ?
''', (datetime_entry, location.latitude, location.longitude))
existing_entry = cursor.fetchone()
if existing_entry:
# Update existing location
cursor.execute('''
UPDATE locations
SET city = ?, state = ?, zip = ?, street = ?, elevation = ?
WHERE id = ?
''', (location.city, location.state, location.zip, location.street, location.elevation, existing_entry[0]))
else:
# Insert new location into database
cursor.execute('''
INSERT INTO locations (datetime, latitude, longitude, city, state, zip, street, elevation)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', data_to_insert)
conn.commit()
conn.close()
return {
'datetime': datetime_entry,
'latitude': location.latitude,
'longitude': location.longitude,
'city': location.city,
'state': location.state,
'zip': location.zip,
'street': location.street,
'elevation': location.elevation
}
def fetch_locations(start: datetime, end: datetime) -> List[dict]:
start_date = start.strftime("%Y-%m-%d")
end_date = end.strftime("%Y-%m-%d")
conn = db_localized()
cursor = conn.cursor()
query = '''
SELECT * FROM locations
WHERE datetime BETWEEN ? AND ?
'''
cursor.execute(query, (start_date, end_date))
rows = cursor.fetchall()
conn.close()
locations = [dict(row) for row in rows]
return locations
@locate.get("/map/start_date={start_date_str}&end_date={end_date_str}", response_class=HTMLResponse)
async def generate_map_endpoint(start_date_str: str, end_date_str: str):
try:
start_date = datetime.fromisoformat(start_date_str)
end_date = datetime.fromisoformat(end_date_str)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
html_content = await generate_map(start_date, end_date)
return HTMLResponse(content=html_content)
@locate.get("/map", response_class=HTMLResponse)
async def generate_alltime_map_endpoint():
try:
start_date = datetime.fromisoformat("2023-01-01")
end_date = datetime.now()
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
html_content = await generate_map(start_date, end_date)
return HTMLResponse(content=html_content)
async def generate_map(start_date: datetime, end_date: datetime):
locations = fetch_locations(start_date, end_date)
if not locations:
raise HTTPException(status_code=404, detail="No locations found for the given date range")
# Create a folium map centered around the first location
map_center = [locations[0]['latitude'], locations[0]['longitude']]
m = folium.Map(location=map_center, zoom_start=5)
# Add markers for each location
for loc in locations:
folium.Marker(
location=[loc["latitude"], loc["longitude"]],
popup=f"{loc['city']}, {loc['state']}<br>Elevation: {loc['elevation']}m<br>DateTime: {loc['datetime']}",
tooltip=f"{loc['city']}, {loc['state']}"
).add_to(m)
# Save the map to an HTML file and return the HTML content
map_html = "map.html"
m.save(map_html)
with open(map_html, 'r') as file:
html_content = file.read()
return html_content
@locate.post("/locate")
async def post_locate(locations: Union[Location, List[Location]]):
pacific = pytz.timezone('America/Los_Angeles')
responses = []
if isinstance(locations, Location):
locations = [locations]
for location in locations:
if location.date:
if len(location.date.split(' ')) == 1:
date_time_str = location.date + " 14:00" # Default to 2 PM if time is not provided
else:
date_time_str = location.date
else:
current_time = datetime.now(pacific)
date_time_str = current_time.strftime("%Y-%m-%d %H:%M")
date_time = datetime.strptime(date_time_str, "%Y-%m-%d %H:%M")
location_entry = post_location(date_time, location)
# Assume fetching weather data is handled separately and similarly needs updating
weather_data = get_weather(location_entry['latitude'], location_entry['longitude'], date_time_str.split(' ')[0])
responses.append({"location_data": location_entry, "weather_data": weather_data})
return {"message": "Locations and weather updated", "results": responses}
@locate.get("/locate/{datetime_str}")
async def get_locate(datetime_str: str):
try:
# Parse string to datetime
date_time = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M")
location_data = get_location(date_time)
if not location_data:
raise HTTPException(status_code=404, detail="No nearby data found for this date and time")
return location_data
except ValueError:
raise HTTPException(status_code=400, detail="Invalid datetime format. It must be YYYY-MM-DD HH:MM")
def get_location(date: datetime) -> list:
pacific = pytz.timezone('America/Los_Angeles')
formatted_datetime = pacific.localize(date).isoformat()
conn = db_localized()
cursor = conn.cursor()
locations = []
# First, try to find an exact match
cursor.execute('''
SELECT id, datetime, latitude, longitude, city, state, zip, street, elevation FROM locations
WHERE datetime = ?
''', (formatted_datetime,))
location_data = cursor.fetchone()
if location_data:
locations.append(map_location_data(location_data))
# If no exact match found or only the date was provided, check for records on the same day
if len(locations) == 0 or '00:00' in formatted_datetime:
start_of_day = pacific.localize(datetime.combine(date.date(), datetime.min.time()))
end_of_day = start_of_day + timedelta(days=1)
cursor.execute('''
SELECT id, datetime, latitude, longitude, city, state, zip, street, elevation FROM locations
WHERE datetime >= ? AND datetime < ?
ORDER BY datetime DESC
''', (start_of_day.isoformat(), end_of_day.isoformat()))
day_locations = cursor.fetchall()
for loc in day_locations:
locations.append(map_location_data(loc))
# If no records on the same day, find the most recent before the given datetime
if not locations:
cursor.execute('''
SELECT id, datetime, latitude, longitude, city, state, zip, street, elevation FROM locations
WHERE datetime < ?
ORDER BY datetime DESC
LIMIT 1
''', (formatted_datetime,))
location_data = cursor.fetchone()
if location_data:
locations.append(map_location_data(location_data))
conn.close()
return locations if locations else []
def parse_custom_datetime(date_str, hour_str):
try:
datetime_str = f"{date_str} {hour_str}:00"
naive_datetime = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M")
pacific = pytz.timezone('America/Los_Angeles')
aware_datetime = pacific.localize(naive_datetime, is_dst=None)
return aware_datetime.isoformat()
except Exception as e:
raise ValueError(f"Error parsing datetime: {str(e)}")
def post_custom_location(date_time_str, location: Location):
conn = db_localized()
cursor = conn.cursor()
datetime_entry = date_time_str # Use the already formatted datetime string
data_to_insert = (
datetime_entry,
location.latitude,
location.longitude,
location.city,
location.state,
location.zip,
location.street,
location.elevation
)
cursor.execute('''
SELECT id FROM locations
WHERE datetime = ? AND latitude = ? AND longitude = ?
''', (datetime_entry, location.latitude, location.longitude))
existing_entry = cursor.fetchone()
if existing_entry:
cursor.execute('''
UPDATE locations
SET city = ?, state = ?, zip = ?, street = ?, elevation = ?
WHERE id = ?
''', (location.city, location.state, location.zip, location.street, location.elevation, existing_entry[0]))
else:
cursor.execute('''
INSERT INTO locations (datetime, latitude, longitude, city, state, zip, street, elevation)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', data_to_insert)
conn.commit()
conn.close()
return {
'datetime': datetime_entry,
'latitude': location.latitude,
'longitude': location.longitude,
'city': location.city,
'state': location.state,
'zip': location.zip,
'street': location.street,
'elevation': location.elevation
}
@locate.post("/upload_tracker")
async def upload_custom_locations(data: Dict[str, Dict[str, Any]]):
results = []
for date, hours in data.items():
for hour, location in hours.items():
try:
datetime_str = parse_custom_datetime(date, hour)
location_input = Location(**location)
location_entry = post_custom_location(datetime_str, location_input)
results.append(location_entry)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Error processing location for {datetime_str}: {str(e)}")
return {"message": "Locations uploaded successfully", "results": results}

27
tests/newstest.py Normal file
View file

@ -0,0 +1,27 @@
import newspaper
import trafilatura
aggregator = []
nyt = newspaper.build('https://wired.com', language='en')
newspaper.NewsPool
if len(nyt.articles) > 0:
for article in nyt.articles:
try:
downloaded = trafilatura.fetch_url(article.url)
# handle the case where no meaningful content extracted
if downloaded is None:
print(f"Nothing downloaded.")
continue
result = trafilatura.extract(downloaded, include_comments=False)
if result is not None:
aggregator.append(result)
print(f"Article: {result}")
else:
print(f"No result.")
except Exception as e:
print(f'Failed to process article {article.url} with error {str(e)}')
# Then you can do something with the aggregator list here.
else:
print(f"No articles obtained!")

297
tests/weather copy.py Normal file
View file

@ -0,0 +1,297 @@
from fastapi import APIRouter, HTTPException, Query, Response
from fastapi.responses import JSONResponse, HTMLResponse
import psycopg2
import pytz
from datetime import datetime
from typing import List
from fastapi import HTTPException
from fastapi.responses import HTMLResponse
import folium
import os
import asyncpg
import time as timers
import json
from pydantic import BaseModel
from typing import Optional, Any, Dict, List
from pathlib import Path
from datetime import datetime, timedelta, time
import pandas
import math
import pytz
import httpx
from pydantic import BaseModel
from typing import Optional, Union
from fastapi.encoders import jsonable_encoder
from sshtunnel import SSHTunnelForwarder
import psycopg2
import requests
from pathlib import Path
from fastapi.responses import PlainTextResponse
from psycopg2.extras import RealDictCursor
import pytz
from datetime import datetime
from typing import List
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, DATA_DIR, VISUALCROSSING_BASE_URL, VISUALCROSSING_API_KEY, LOCATIONS_CSV, DB, BASE_URL, GLOBAL_API_KEY, TS_IP
from sijapi.utilities import get_db_connection, db_localized, haversine
weather = APIRouter()
def get_stored_weather(conn, date, hour, zip_code):
cursor = conn.cursor()
hour = str(int(hour)).zfill(2) # Ensure hour is zero-padded
query = '''
SELECT H.*, HW.*, DW.*
FROM Hours H
JOIN Days D ON H.day_id = D.id
JOIN Locations L ON H.location_id = L.id
LEFT JOIN HourlyWeather HW ON H.hourly_weather_id = HW.id
LEFT JOIN DailyWeather DW ON DW.day_id = D.id
WHERE D.date = ? AND H.hour = ? AND L.zip = ?
'''
parameters = (date, hour, zip_code)
DEBUG(f"Querying stored weather data with: {query} and parameters: {parameters}")
cursor.execute(query, parameters)
result = cursor.fetchone()
if result:
DEBUG(f"Stored weather found for {date} {hour}:00 at zip {zip_code}")
return dict(result)
DEBUG(f"No stored weather found for {date} {hour}:00 at zip {zip_code}")
return None
def get_weather(latitude: float, longitude: float, date: datetime = None):
date = date if date else datetime.now()
request_date_str = date.strftime("%Y-%m-%d")
DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, datetime: {request_date_str}")
daily_weather_data = get_weather_data(latitude, longitude, date)
DEBUG(f"daily_weather_data: {daily_weather_data}")
date_now = datetime.now().date()
date_requested = date.date()
fetch_new_data = False
if daily_weather_data:
last_updated = daily_weather_data.get('last_updated')
date_updated = datetime.strptime(last_updated, "%Y-%m-%d %H:%M:%S")
if date_updated:
DEBUG(f"DEBUG: last updated {date_updated}")
else:
ERR(f"Take a look here in get_weather...")
DEBUG(f"LAT: {latitude}\nLON: {longitude}\nDATA LAT: {daily_weather_data.get('latitude')}\nDATA LON: {daily_weather_data.get('longitude')}\n")
distance = haversine(latitude, longitude, daily_weather_data.get('latitude'), daily_weather_data.get('longitude'))
DEBUG(f"haversine distance: {distance}")
if (date_requested >= date_now or
date_updated.date() < date_requested or
distance > 8):
DEBUG(f"we need new data!")
fetch_new_data = True
else:
fetch_new_data = True
if fetch_new_data:
url = f"{VISUALCROSSING_BASE_URL}/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
try:
response = requests.get(url)
if response.status_code == 200:
DEBUG(f"Successfully obtained data from VC...")
try:
weather_data = response.json()
DEBUG(f"Calling store_weather_data with {weather_data}")
store_weather_data(weather_data, date)
DEBUG(f"New weather data for {request_date_str} stored in database...")
daily_weather_data = get_weather_data(latitude, longitude, date)
if daily_weather_data is not None:
DEBUG(f"Weather data retrieved back from database {daily_weather_data}")
return daily_weather_data
else:
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
except Exception as e:
ERR(f"Problem parsing VC response or storing data: {e}")
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
else:
ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}")
raise HTTPException(status_code=response.status_code, detail="Failed to fetch weather data")
except Exception as e:
ERR(f"Exception during API call: {e}")
raise HTTPException(status_code=500, detail="Failed to fetch weather data")
return daily_weather_data
def store_weather_data(weather_data, date: datetime):
# DEBUG(f"Reached store_weather_data. weather_data = {weather_data}")
request_date_str = date.strftime("%Y-%m-%d")
conn = db_localized()
cursor = conn.cursor()
try:
day_data = weather_data.get('days')[0]
# DEBUG(f"day_data: {day_data}")
# Convert 'preciptype' and 'stations' to CSV string if they are lists
preciptype_csv = ','.join(day_data['preciptype']) if isinstance(day_data.get('preciptype'), list) else ""
stations_csv = ','.join(day_data['stations']) if isinstance(day_data.get('stations'), list) else ""
daily_weather_params = (
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
day_data.get('sunset'), day_data.get('sunsetEpoch'),
day_data.get('description'), day_data.get('tempmax'),
day_data.get('tempmin'), day_data.get('uvindex'),
day_data.get('winddir'), day_data.get('windspeed'),
day_data.get('icon'), datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
day_data.get('datetime'), day_data.get('datetimeEpoch'),
day_data.get('temp'), day_data.get('feelslikemax'),
day_data.get('feelslikemin'), day_data.get('feelslike'),
day_data.get('dew'), day_data.get('humidity'),
day_data.get('precip'), day_data.get('precipprob'),
day_data.get('precipcover'), preciptype_csv,
day_data.get('snow'), day_data.get('snowdepth'),
day_data.get('windgust'), day_data.get('pressure'),
day_data.get('cloudcover'), day_data.get('visibility'),
day_data.get('solarradiation'), day_data.get('solarenergy'),
day_data.get('severerisk', 0), day_data.get('moonphase'),
day_data.get('conditions'), stations_csv, day_data.get('source')
)
daily_weather_query = '''
INSERT INTO DailyWeather (
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
dew, humidity, precip, precipprob, precipcover, preciptype,
snow, snowdepth, windgust, pressure, cloudcover, visibility,
solarradiation, solarenergy, severerisk, moonphase, conditions,
stations, source
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
'''
cursor.execute(daily_weather_query, daily_weather_params)
daily_weather_id = cursor.lastrowid
cursor.execute('''
INSERT INTO Days (date, daily_weather_id) VALUES (?, ?)
ON CONFLICT(date) DO UPDATE SET daily_weather_id = excluded.daily_weather_id
''', (request_date_str, daily_weather_id))
if 'hours' in day_data:
for hour_data in day_data['hours']:
station_hours = ','.join(day_data['stations']) if isinstance(day_data.get('stations'), list) else ""
cursor.execute('''
INSERT INTO HourlyWeather (day_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob,
snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy,
uvindex, severerisk, conditions, icon, stations, source)
VALUES ((SELECT id FROM Days WHERE date = ?), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
request_date_str, hour_data['datetime'], hour_data.get('datetimeEpoch'), hour_data['temp'], hour_data['feelslike'],
hour_data['humidity'], hour_data['dew'], hour_data['precip'], hour_data['precipprob'], hour_data['snow'],
hour_data['snowdepth'], hour_data['windgust'], hour_data['windspeed'], hour_data['winddir'], hour_data['pressure'],
hour_data['cloudcover'], hour_data['visibility'], hour_data['solarradiation'], hour_data['solarenergy'],
hour_data['uvindex'], hour_data.get('severerisk', 0), hour_data['conditions'], hour_data['icon'], station_hours,
hour_data.get('source', '')
))
conn.commit()
except conn.OperationalError as e:
ERR(f"SQLite error occurred: {e}")
except Exception as e:
ERR(f"Unexpected error occurred: {e}")
raise
finally:
conn.close()
def get_weather_data(latitude: float, longitude: float, date: datetime):
if isinstance(date, str):
date = datetime.fromisoformat(date)
request_date_str = date.strftime("%Y-%m-%d")
with conn.connect(DB) as conn:
conn.row_factory = conn.Row
cursor = conn.cursor()
cursor.execute('''
SELECT DW.*, D.date AS last_updated FROM Days D
JOIN DailyWeather DW ON D.daily_weather_id = DW.id
WHERE D.date = ?
''', (request_date_str,))
weather_data = cursor.fetchone()
if not weather_data:
return None
weather_dict = dict(weather_data)
cursor.execute('''
SELECT HW.* FROM HourlyWeather HW
JOIN Days D ON HW.day_id = D.id
WHERE D.date = ?
''', (request_date_str,))
hourly_data_rows = cursor.fetchall()
hourly_data = {
row['datetime']:
{
'temperature': row['temp'],
'conditions': row['conditions'],
'wind': f"{row['windspeed']}mph {row['winddir']}",
'feelslike': row['feelslike'],
'precip': row['precip'],
'precipprob': row['precipprob'],
'snow': row['snow'],
'humidity': row['humidity'],
'dew': row['dew'],
'uvindex': row['uvindex'],
'solarradiation': row['solarradiation'],
'icon': row['icon']
}
for row in hourly_data_rows
}
weather_dict['hours'] = hourly_data
return weather_dict
def get_daily_weather_data(date: datetime):
request_date_str = date.strftime("%Y-%m-%d")
with conn.connect(DB) as conn:
conn.row_factory = conn.Row
cursor = conn.cursor()
cursor.execute('''
SELECT DW.* FROM Days D
JOIN DailyWeather DW ON D.daily_weather_id = DW.id
WHERE D.date = ?
''', (request_date_str,))
daily_weather_data = cursor.fetchone()
if daily_weather_data is not None:
return dict(daily_weather_data) # Convert conn.Row object to dictionary
else:
return None
def get_hourly_weather_data(date: datetime):
request_date_str = date.strftime("%Y-%m-%d")
with conn.connect(DB) as conn:
conn.row_factory = conn.Row
cursor = conn.cursor()
cursor.execute('''
SELECT DW.* FROM Days D
JOIN DailyWeather DW ON D.daily_weather_id = DW.id
WHERE D.date = ?
''', (request_date_str,))
daily_weather_data = cursor.fetchone()
if daily_weather_data is not None:
return dict(daily_weather_data)
else:
return None