Initial commit
This commit is contained in:
commit
7ea0783076
62 changed files with 13964 additions and 0 deletions
82
.gitignore
vendored
Normal file
82
.gitignore
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
# Ignore specific data files and directories
|
||||
sijapi/data/calendar.ics
|
||||
sijapi/data/asr/
|
||||
sijapi/data/geocoder/
|
||||
sijapi/data/tts/
|
||||
sijapi/data/db/
|
||||
sijapi/data/*.pbf
|
||||
sijapi/data/geonames.txt
|
||||
sijapi/data/sd/images/
|
||||
sijapi/config/O365/
|
||||
|
||||
# Ignore all .env files
|
||||
**/.env
|
||||
|
||||
# Ignore all log files
|
||||
**/*.log
|
||||
**/logs/
|
||||
|
||||
# Ignore Python cache files
|
||||
**/__pycache__
|
||||
|
||||
# Ignore macOS system files
|
||||
**/.DS_Store
|
||||
|
||||
# Ignore all .ics files
|
||||
**/*.ics
|
||||
|
||||
# Ignore sync conflict files
|
||||
**/*sync-conflict*.*
|
||||
|
||||
# Ignore database files
|
||||
**/*.db
|
||||
|
||||
# Ignore large binary files
|
||||
**/*.mp3
|
||||
**/*.mp4
|
||||
**/*.wav
|
||||
|
||||
# Ignore compiled Python files
|
||||
**/*.pyc
|
||||
|
||||
# Ignore Jupyter Notebook checkpoints
|
||||
**/.ipynb_checkpoints/
|
||||
|
||||
# Ignore virtual environment directories
|
||||
venv/
|
||||
env/
|
||||
.venv/
|
||||
|
||||
# Ignore IDE-specific files
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# Ignore temporary files
|
||||
*~
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Ignore compiled files
|
||||
*.com
|
||||
*.class
|
||||
*.dll
|
||||
*.exe
|
||||
*.o
|
||||
*.so
|
||||
|
||||
# Ignore packages
|
||||
*.7z
|
||||
*.dmg
|
||||
*.gz
|
||||
*.iso
|
||||
*.jar
|
||||
*.rar
|
||||
*.tar
|
||||
*.zip
|
||||
|
||||
# Ignore OS generated files
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Ignore .gitignore itself (optional)
|
||||
#.gitignore
|
481
README.md
Normal file
481
README.md
Normal file
|
@ -0,0 +1,481 @@
|
|||
```
|
||||
#──────────────────────────────────────────────────────────────────────────────────
|
||||
# C O N F I G U R A T I O N F I L E
|
||||
#──────────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# Hi friend! You've found my hidden .config.YAML-example file. Do you like
|
||||
# old-school ASCII art? I bet you do. So listen, this'll be your method for
|
||||
# configuring sijapi, and nothing works until you at least:
|
||||
#
|
||||
# (1) fill in the ESSENTIALS category, and
|
||||
#
|
||||
# (2) rename this file `.config.yaml`
|
||||
#
|
||||
# ... and even then, certain features will not work until you set other
|
||||
# relevant variables below.
|
||||
#
|
||||
# So get yourself a beverage, put on some sick beats, and settle in for a vibe-y
|
||||
# configuration sesh. Remember to read my detailed notes if you ever feel lost,
|
||||
# and most important, remember:
|
||||
#
|
||||
# you are NOT alone,
|
||||
# I love you SO much,
|
||||
# and you are SO worthy. <3
|
||||
#
|
||||
# y o u r b f & b f 4 e , †
|
||||
# .x+=:. . .
|
||||
# z` ^% @88> .. @88>
|
||||
# . <k %8P 888> .d`` %8P
|
||||
# .@8Ned8" . "8P u @8Ne. .u .
|
||||
# .@^%8888" .@88u . us888u. %8888:u@88N .@88u
|
||||
# x88: `)8b. ''888E` u888u. .@88 "8888" `888I 888. ''888E`
|
||||
# ~ 8888N=*8888 888E `'888E 9888 9888 888I 888I 888E
|
||||
# %8" R88 888E 888E 9888 9888 888I 888I 888E
|
||||
# @8Wou 9% 888E 888E 9888 9888 uW888L 888' 888E
|
||||
# .888888P` 888& 888E 9888 9888 '*88888Nu88P 888&
|
||||
# ` ^"F R888" 888E "888*""888" ~ '88888F` R888"
|
||||
# "" 888E ^Y" ^Y' 888 ^ ""
|
||||
# 888E *8E
|
||||
# 888P '8> † biggest fan
|
||||
# .J88" " " and best
|
||||
# friend 4 e v e r
|
||||
#
|
||||
# B U T I H E A R Y O U :
|
||||
# L E T ' S T A K E I T S L O W A N D
|
||||
# ────────────── S T A R T W I T H T H E ──────────────
|
||||
#
|
||||
# ███████╗███████╗███████╗███████╗███╗ ██╗████████╗██╗ █████╗ ██╗ ███████╗
|
||||
# ██╔════╝██╔════╝██╔════╝██╔════╝████╗ ██║╚══██╔══╝██║██╔══██╗██║ ██╔════╝
|
||||
# █████╗ ███████╗███████╗█████╗ ██╔██╗ ██║ ██║ ██║███████║██║ ███████╗
|
||||
# ██╔══╝ ╚════██║╚════██║██╔══╝ ██║╚██╗██║ ██║ ██║██╔══██║██║ ╚════██║
|
||||
# ███████╗███████║███████║███████╗██║ ╚████║ ██║ ██║██║ ██║███████╗███████║
|
||||
# ╚══════╝╚══════╝╚══════╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝
|
||||
# ─────────────────────────────────────────────────────────────────
|
||||
#
|
||||
#─── first, bind an ip address and port : ──────────────────────────────────────────
|
||||
HOST_NET=0.0.0.0
|
||||
HOST_PORT=4444
|
||||
BASE_URL=https://api.sij.ai
|
||||
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
|
||||
# BASE_URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the sd router.
|
||||
# BASE_URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
|
||||
#
|
||||
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
|
||||
#
|
||||
# Here are a few options to consider to more securely enable access from
|
||||
# other devices:
|
||||
#
|
||||
# (1) if all access can occur over Tailscale, either:
|
||||
# (a) leave HOST_NET set to 127.0.0.1, run `tailscale cert $(tailscale
|
||||
# whois $(tailscale ip | head -n 1) | awk '/Name:/ {print $2}')
|
||||
# if you haven't already issued yourself a TLS certificate on
|
||||
# Tailscale, and then run `tailscale serve --bg --https=4443
|
||||
# 4444` to expose sijapi to your other tailscale-enabled devices
|
||||
# at `https://{device.magicdns-domain.net:4443`}; or
|
||||
# (b) set HOST_NET to your server's Tailscale IP (this should work
|
||||
# but for me doesn't reliably)
|
||||
#
|
||||
# (2) if WAN access truly is required, leave HOST_NET set to 127.0.0.1 and
|
||||
# configure either:
|
||||
# (a) a Cloudflare tunnel, or
|
||||
# (b) a reverse proxy with HTTPS (Caddy is excellent for this).
|
||||
#
|
||||
# And please be sure to set a strong API key either way but especially for (2).
|
||||
# ──────────
|
||||
#
|
||||
#──── configure API key authorization and select exemptions────────────────────────
|
||||
GLOBAL_API_KEY=sk-NhrtQwCHNdK5sRZC
|
||||
PUBLIC_SERVICES=/id,/ip,/health,/img/,/cl/dockets,/cl/search,/cd/alert
|
||||
TRUSTED_SUBNETS=127.0.0.1/32,10.13.37.0/24,100.64.64.0/24
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# GLOBAL_API_KEY determines the API key that will be required to access all endpoints, except access to PUBLIC_SERVICES or from TRUSTED_SUBNETS. Authentication is made via an `Authorization: Bearer {GLOBAL_API_KEY}` header.
|
||||
# TRUSTED_SUBNETS might commonly include 127.0.0.1/32 (localhost), 100.x.x.0/24 (Tailscale tailnet), and/or 192.168.x.0/24 or 10.x.x.0/24 (local network).
|
||||
# When configuring a reverse proxy or Cloudflare tunnel, please verify traffic through it does not appear to sijapi (i.e. in ./logs) as though it were coming from any of the subnets specified here. For sij, using Caddy, it does not, but your setup may differ.
|
||||
# ──────────
|
||||
#
|
||||
#─── router selection: ────────────────────────────────────────────────────────────
|
||||
ROUTERS=asr,llm,health,hooks,locate,note,sd,serve,summarize,time,tts,weather
|
||||
UNLOADED=auth,calendar,cf,email,ig
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# ROUTERS determines which routers are loaded.†
|
||||
# UNLOADED is not used directly -- it's just there to help keep track which routers are disabled.
|
||||
#
|
||||
# † ┓ ┏ orth bearing in mind: some routers inherently rely on other routers,
|
||||
# ┃┃┃ 3rd party APIs, or other apps being installed locally. If a router is
|
||||
# ┗┻┛ set to load (i.e. is included in ROUTERS) depends on another router,
|
||||
# that other router will also load too irrespective of whether it's listed.
|
||||
#
|
||||
# But let's get down to brass tacks, shall we?
|
||||
#
|
||||
# asr: requires faster_whisper — $ pip install faster_whisper — and
|
||||
# downloading the model file specified in ASR_DEFAULT_MODEL.
|
||||
#
|
||||
# auth: authenticates a Microsoft 365 account (for email & calendar).
|
||||
#
|
||||
# calendar: requires (1) a Microsoft 365 account with a properly configured
|
||||
# Azure Active Directory app, and/or (2) Calendars on macOS.
|
||||
#
|
||||
# cf: interfaces with the Cloudflare API and Caddy to register new
|
||||
# [sub-]domains on Cloudflare and deploy them with Caddy as
|
||||
# reverse proxy.
|
||||
#
|
||||
# llm: requires ollama — $ pip install ollama — and downloading the
|
||||
# models set in LLM_DEFAULT_MODEL and LLM_VISION_MODEL.
|
||||
#
|
||||
# email: designed for accessing Protonmail via Protonmail Bridge and/or
|
||||
# Microsoft 365, but should work with any IMAP/SMTP email account.
|
||||
#
|
||||
# hooks: designed for two specific use cases: monitoring court dockets
|
||||
# through CourtListener.org, and monitoring arbitrary web pages for
|
||||
# changes in tandem with a self-hosted changedetection.io instance.
|
||||
# Both require accounts; other functionality would require
|
||||
# additional / modified code.
|
||||
#
|
||||
# ig: requires an Instagram account, with credentials and other settings
|
||||
# configured separately in the ig_config.json file; relies heavily
|
||||
# on the llm and sd routers which have their own dependencies.
|
||||
#
|
||||
# locate: some endpoints work as is, but the core location tracking
|
||||
# functionality requires Postgresql + PostGIS extension and are
|
||||
# designed specifically to pair with a mobile device where
|
||||
# Pythonista is installed and configured to run the
|
||||
# `gps_tracker.py` and `gps_upload.py` scripts periodically or per
|
||||
# repeating conditionwy (e.g. via automation under Apple Shortcuts).
|
||||
#
|
||||
# note: designed for use with Obsidian plus the Daily Notes and Tasks
|
||||
# core extensions; and the Admonitions, Banners, Icons (with the
|
||||
# Lucide pack), and Make.md community extensions. Moreover `notes`
|
||||
# relies heavily on the calendar, llm, locate, sd, summarize, time,
|
||||
# tts, and weather routers and accordingly on the external
|
||||
# dependencies of each.
|
||||
#
|
||||
# sd: requires ComfyUI plus any modules and StableDiffusion models
|
||||
# set in sd_config and individual workflow .json files.
|
||||
#
|
||||
# summarize: relies on the llm router and thus requires ollama.
|
||||
#
|
||||
# time: requires the subscription-based macOS app 'Timing' (one of many
|
||||
# apps that together make SetApp an incredible value for macOS users!)
|
||||
#
|
||||
# tts: designed for use with coqui — $ pip install coqui — and/or the
|
||||
# ElevenLabs API.
|
||||
#
|
||||
# weather: requires a VisualCrossing API key and is designed for (but doesn't
|
||||
# itself strictly require) Postgresql with the PostGIS extension;
|
||||
# (... but it presently relies on the locate router, which does).
|
||||
#
|
||||
#
|
||||
# ... Whew! that was a lot, right? I'm so glad we're in this together...
|
||||
# ──────────
|
||||
#
|
||||
#─────────────────────── Y O U ' R E G O N N A L O V E ────────────────────────
|
||||
#
|
||||
# ░ ░░ ░░ ░ ░░░░░░░░ ░░░ ░░░ ░░ ░░░░░░░ ░
|
||||
# ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒
|
||||
# ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓▓▓▓▓ ▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓ ▓▓▓▓▓▓▓ ▓▓▓
|
||||
# ████ ████ ████ ████ █████████████ █ ████ █ █ ███████ ███████
|
||||
# ████ ████ ████ █ █ ██ ███ ██ ████ █ █ █
|
||||
#
|
||||
# A N D I ' M N O T. E V E N. J E A L O U S.
|
||||
# Y O U D E S E R V E I T A L L , B A B Y C A K E S.
|
||||
#
|
||||
#─── use tailscale for secure remote access: ───────────────────────────────────────
|
||||
TS_IP=100.64.64.20
|
||||
TS_SUBNET=100.64.64.0/24
|
||||
TS_ID=sij-mbp16
|
||||
TS_TAILNET=starling-sailfin
|
||||
TAILSCALE_API_KEY=tskey-api-kosR4MfJtF11CNTRL-zJu4odnpr4huLwAGsuy54hvkJi2ScVWQL
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# TS_IP should match the Tailscale IP of the device. But this is deprecated, and if the functionality becomes relevant again, it should be come back in the form of a dynamic check (`tailscale status` in a shell subprocess) in __init__.py or even the /id endpoint.
|
||||
# TS_SUBNET should match the IP/CIDR-format tailnet
|
||||
# TS_ID currently has two roles: it's used to assemble the complete MagicDNS of the server, and it determines what the /id endpoint on the health router returns. This is relevant where multiple servers run the script behind a load balancer (e.g. Caddy), as a means to check which server responds. Bear in mind that /id is NOT API key-protected by default here.
|
||||
# TS_TAILNET should match the tailnet's MagicDNS domain (omitting the `.net`, for reasons)
|
||||
# ──────────
|
||||
#
|
||||
#────────────── U & M E ── W E C A N G E T T H R O U G H ──────────────────
|
||||
#
|
||||
# ██▓███ ▒█████ ██████ ▄▄▄█████▓ ▄████ ██▀███ ▓█████ ██████
|
||||
# ▓██░ ██▒██▒ ██▒▒██ ▒ ▓ ██▒ ▓▒ ██▒ ▀█▒▓██ ▒ ██▒▓█ ▀ ▒██ ▒
|
||||
# ▓██░ ██▓▒██░ ██▒░ ▓██▄ ▒ ▓██░ ▒░▒██░▄▄▄░▓██ ░▄█ ▒▒███ ░ ▓██▄
|
||||
# ▒██▄█▓▒ ▒██ ██░ ▒ ██▒░ ▓██▓ ░ ░▓█ ██▓▒██▀▀█▄ ▒▓█ ▄ ▒ ██▒
|
||||
# ▒██▒ ░ ░ ████▓▒░▒██████▒▒ ▒██▒ ░ ░▒▓███▀▒░██▓ ▒██▒░▒████▒▒██████▒▒
|
||||
# ▒██▒ ░ ░ ▒░▒░▒░ ▒ ▒▓▒ ▒ ░ ▒ ░░ ░▒ ▒ ░ ▒▓ ░▒▓░░░ ▒░ ░▒ ▒▓▒ ▒ ░
|
||||
# ▒▓▒░ ░ ▒ ▒░ ░ ░▒ ░ ░ ░ ░ ░ ░▒ ░ ▒░ ░ ░ ░░ ░▒ ░ ░
|
||||
# ░▒ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░░ ░ ░ ░ ░ ░
|
||||
# ░░ ░ ░ T O G E T H ░ R . ░ ░ ░ ░ ░
|
||||
# ░
|
||||
#─── for weather and locate modules: ───────────── J U S T H O L D M Y H A N D .
|
||||
DB=sij
|
||||
# R E A L T I G H T.
|
||||
DB_HOST=127.0.0.1
|
||||
DB_PORT=5432
|
||||
# U G O T T H I S , K ?
|
||||
DB_USER=sij
|
||||
DB_PASS='Synchr0!'
|
||||
# Y E A H . . .
|
||||
DB_SSH=100.64.64.15
|
||||
# * J U S T L I K E T H A T . *
|
||||
DB_SSH_USER=sij
|
||||
DB_SSH_PASS='Synchr0!'
|
||||
#─── notes: ────────────────────────────────────────────────── S E E ? E Z - P Z
|
||||
#
|
||||
# DB, DB_HOST, DB_PORT, DB_USER, and DB_PASS should specify those respective
|
||||
# credentials for your Postgres database. DB_SSH and associated _USER and _PASS
|
||||
# variables allow database access over an SSH tunnel.
|
||||
#
|
||||
# In the current implementation, we rely on Postgres to hold:
|
||||
# i. user-logged location data (locate module), and
|
||||
# ii. results from past weather forecast checks (weather module).
|
||||
#
|
||||
# A future version will hopefully make use of PostGIS's geocoding capabilities,
|
||||
# and add a vector database for the LLM module. Until then it's up to you if the
|
||||
# locate and weather modules are worth the hassle of maintaining Postgres.
|
||||
# ──────────
|
||||
#
|
||||
#
|
||||
#───── Y O U C A N S I T T H I S O N E) O U T B A B E , ────────<3─────────
|
||||
# ( ( ( I F Y O U ' D )
|
||||
# ))\( ( /(( L I K E . . . ( (
|
||||
# ( (()/(( /((_)\ )\())),----,.
|
||||
# )\((__ ))\( ()) |__))((_)- ))((,' ,'
|
||||
# ,' , `. /((_)\(_) / / '. |(_)|_ ,' .'
|
||||
# ,-+-,.' _ | / / '. / ../ ; ,---. ,----.' .'
|
||||
# ,-+-. ; , || | : /`. / \ ``\ .`- ' / \ | | .'
|
||||
# ,--.'|' | ;|; | |--` \___\/ \ : / / ' : : |--,
|
||||
# | | ,', | ':| : ;_ \ : | . ' / : | ;.' \
|
||||
# | | / | | || \ \ `. / / / ' / ; | | |
|
||||
# ' | : | : |, `----. \ ) \ \ \ | : \ `----'.'\ ;
|
||||
# ; . | ; |--' )(__ \ \ | ((__ / : |; | ``. __ \ . |
|
||||
# | : | | , / /`--' / /)\(/\ / :' ; \ / /\/ / :
|
||||
# | : ' |/ '--'. / / ,,/ ',- .' | .\ |/ ,,/ ',- .
|
||||
# ; | |`-' `--'---' \ ''\ ; | : '; :\ ''\ ;
|
||||
# | ;/ O R , Y U P , \ \ .' \ \ / \ \ .'
|
||||
# '---'B U R N I T A L L D O W N.-`-,,-' `---`--` `--`-,-'
|
||||
# Y O U H A V E A
|
||||
# G O D D E S S O F D E S T R U C T I O N W I T H I N ,
|
||||
# A N D T H A T I S S O V A L I D !!
|
||||
#─── ms365 (calendars): ──────────────────────────────────────────────────────────────
|
||||
MS365_TOGGLE=False
|
||||
ICAL_TOGGLE=True
|
||||
ICALENDARS=3CCC9C7B-BFF0-4850-9CE9-BC504859CBC6,E68FE085-2ECA-4097-AF0A-8D38C404D8DA,AB5A0473-16DD-4916-BD6D-6CB916726605∑∑
|
||||
MS365_CLIENT_ID=ce8cbd24-f146-4dc7-8ee7-51d9b69dec59
|
||||
MS365_TENANT_ID=bad78048-a6e0-47b1-a24b-403c444aa349
|
||||
MS365_SECRET=gbw8Q~7U90GMdvneNnPnzAUt5nWVJPbOsagLPbMe
|
||||
MS365_THUMBPRINT=4CD86699A8B675411EE9C971CB2783E11F9E52CB
|
||||
MS365_SCOPE=basic,calendar_all
|
||||
MS365_TOKEN_FILE=oauth_token.txt
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# # MS365_CLIENT_ID, _TENANT_ID, _SECRET, AND _SCOPES must be obtained from Microsoft
|
||||
# via the Azure portal, by creating a new app registration and an accompanying secret.
|
||||
# MS365_THUMBPRINT is vestige of an earlier failed attempt to get this working, and
|
||||
# for now is deprecated. I recommend seeking out a well-reviewed tutorial for
|
||||
# creating an app on Azure with a client_id and secret and necessary scopes for
|
||||
# individual calendar access, because I had one heck of a time trying various approaches.
|
||||
# Do better, Microsoft.
|
||||
# ──────────
|
||||
#
|
||||
#──────────────────────────────── I B E T Y O U ──────────────────────────────────
|
||||
# R E C E I V E A L O T O F L O V E L E T T E R S O V E R
|
||||
#
|
||||
# .----------------. .----------------. .----------------. .----------------.
|
||||
# | .--------------. | .--------------. | .--------------. | .--------------. |
|
||||
# | | _____ | | | ____ ____ | | | __ | | | ______ | |
|
||||
# | | |_ _| | | ||_ \ / _|| | | / \ | | | |_ __ \ | |
|
||||
# | | | | | | | | \/ | | | | / /\ \ | | | | |__) | | |
|
||||
# | | | | | | | | |\ /| | | | | / ____ \ | | | | ___/ | |
|
||||
# | | _| |_ | | | _| |_\/_| |_ | | | _/ / \ \_ | | | _| |_ | |
|
||||
# | | |_____| | | ||_____||_____|| | ||____| |____|| | | |_____| | |
|
||||
# | | | | | | | | | | | | |
|
||||
# | '--------------' | '--------------' | '--------------' | '--------------' |
|
||||
# '----------------' '----------------' '----------------' '----------------'
|
||||
#
|
||||
# E M A I L
|
||||
#
|
||||
#─── imap & smtp: ────────────────────────────────────────────────────────────────────────
|
||||
IMAP_HOST=127.0.0.1
|
||||
EMAIL_ADDRESS='sij@sij.law'
|
||||
EMAIL_PASSWORD='hesSw7Kum16z-_yxI4kfXQ'
|
||||
IMAP_PORT=1143
|
||||
IMAP_ENCRYPTION=STARTTLS
|
||||
SMTP_PORT=1025
|
||||
SMTP_ENCRYPTION=SSL
|
||||
#─── notes: ───────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# This is primarily for summarizing incoming emails. Any IMAP account should work, but
|
||||
# I focused testing on a somewhat complex setup involving Protonmail Bridge.
|
||||
# ──────────
|
||||
#
|
||||
#──────────────────────────────── G E T S I L L Y ────────────────────────────────────
|
||||
# T H E N G O B O N K E R S
|
||||
# W I T H Y O U R O W N
|
||||
#
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓██████▒▓██████▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓████████▓▒ ░▒▓████████▓▒ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
#
|
||||
# ( F O R R E A L T H O U G H — T H E S E
|
||||
#─── via comfyui (stable diffusion): A R E S O H O T R I G H T N O W )
|
||||
LLM_URL=http://localhost:11434
|
||||
SYSTEM_MSG=You are a helpful AI assistant.
|
||||
DEFAULT_LLM=dolphin-mistral
|
||||
DEFAULT_VISION=llava-llama3
|
||||
SUMMARY_MODEL=dolphin-mistral
|
||||
SUMMARY_CHUNK_SIZE=4000
|
||||
SUMMARY_CHUNK_OVERLAP=100
|
||||
SUMMARY_TPW=1.3
|
||||
SUMMARY_LENGTH_RATIO=4
|
||||
SUMMARY_MIN_LENGTH=150
|
||||
SUMMARY_TOKEN_LIMIT=4096
|
||||
SUMMARY_INSTRUCT='You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
|
||||
SUMMARY_INSTRUCT_TTS='You are an AI assistant that provides email summaries for Sanjay -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
|
||||
DEFAULT_VOICE=Luna
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# The exact values here will depend on what software you are using to inference an LLM,
|
||||
# and of course what models and capabilities are available through it. The script was
|
||||
# designed for use with `ollama`, but most of the functionality should be equal with
|
||||
# LM Studio, LocalAI, ect...
|
||||
#
|
||||
# DEFAULT_LLM is self-explanatory; DEFAULT_VISION is used for image recognition within
|
||||
# a multimodal chat context, such as on the ig module for generating intelligible
|
||||
# comments to Instagram posts, or more realistic captions for sd-generated images.
|
||||
#
|
||||
# Note it's possible to specify a separate model for general purposes and for
|
||||
# summarization tasks. The other SUMMARY_ variables call for some explanation,
|
||||
# in particular six that are most relevant when summarizing very long documents:
|
||||
#
|
||||
# SUMMARY_CHUNK_SIZE: determines the maximum length, in tokens, the pieces that are
|
||||
# split and sent individually to the model.
|
||||
#
|
||||
# SUMMARY_CHUNK_OVERLAP: determines how much of each chunk is overlapped with the prior
|
||||
# and next chunks. Set too high causes repetition, set too low
|
||||
# causes misunderstood confusion and poor summary results.
|
||||
# The summarization algorithm is flawed but I've gotten the best
|
||||
# results with this set around 100–200.
|
||||
#
|
||||
# SUMMARY_TPW: used in estimating the token count of a prompt for purposes of
|
||||
# complying with the maximum tokens a model can handle at once.
|
||||
# Best you can do is estimate. I tend to use long words a fair
|
||||
# excessively and found my average was 1.3 tokens per word. YMMV.
|
||||
#
|
||||
# SUMMARY_LENGTH_RATIO: this is the primary control over the length of generated
|
||||
# summaries, expressed as the ratio of original text length to
|
||||
# summary length. The default, 4, means the summaries will be
|
||||
# around 1/4 the length of the original text you provide it.
|
||||
#
|
||||
# SUMMARY_MIN_LENGTH: the default SUMMARY_LENGTH_RATIO of 4 isn't ideal for very
|
||||
# short texts, but setting it any lower sacrifices conciseness
|
||||
# in summaries of longer texts. In short one size doesn't fit
|
||||
# all. The compromise I landed on was to set a "maximum minimum"
|
||||
# summary length: under no circumstances will the script impose
|
||||
# a smaller maximum length than this value.
|
||||
#
|
||||
# SUMMARY_INSTRUCT: sets the prompt used when summarizing text.
|
||||
#
|
||||
# SUMMARY_INSTRUCT_TTS: sets a separate prompt for use when summarizing text where
|
||||
# tts output was requested; tends to yield "cleaner" audio
|
||||
# with less numbers (page numbers, citations) and other
|
||||
# information extraneous to spoken contexts.
|
||||
#
|
||||
# DEFAULT_VOICE: used for all tts tasks when a specific voice is not requested.
|
||||
# ──────────
|
||||
#
|
||||
#────,-_/────────── W E C A N E X P E R I M E N T W I T H ──────────.───────────
|
||||
# ' | ,~-,-. ,-. ,-. ,--. | --' ,--. ,-. ,--. ,-. ,-. |-- . ,-. ,-.
|
||||
# .^ | | | | ,--| | | | --' | -,- | --' | | | --' | ,--| | | | | | |
|
||||
# `--' ' ' ' `-^ `-| `--' `---| `--' ' ' `--' ' `--^ `' ` `-' ' '
|
||||
# , | ,-. | ~ I N T H E N U D E . ~
|
||||
# `~~' `-+'
|
||||
# O R F U L L Y C L O T H E D ── U P T O Y O U
|
||||
#
|
||||
#─── via comfyui (stable diffusion): ───── ( B U T L E T M E K N O W , Y E A H ? )
|
||||
COMFYUI_URL=http://localhost:8188
|
||||
COMFYUI_DIR=/Users/sij/workshop/sd/ComfyUI
|
||||
PHOTOPRISM_USER=NOT_IMPLEMENTED
|
||||
PHOTOPRISM_PASS=NOT_IMPLEMENTED
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# COMFY_URL, as you may expect, should point to the URL you use to access ComfyUI. If you
|
||||
# don't know, watch for it in the server logs once ComfyUI is fully launched.
|
||||
#
|
||||
# COMFYUI_DIR, with similar self-evidence, should point to the base directory of your
|
||||
# ComfyUI installation (i.e. the folder that contains `models`, `inputs`, and `outputs`)
|
||||
#
|
||||
# PhotoPrism integration is not yet implemented, so don't bother with that just yet.
|
||||
# ──────────
|
||||
#
|
||||
# D O N ' T M I S S O N E ───────────────────────────────────────
|
||||
#\ F I N A L S M A T T E R I N G O F M I S C E L L A N Y \
|
||||
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# \ _ _ _/\\\\_ _ _ _ _ _ /\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# \ _ _ \/\\\\\\_ _ _ _ /\\\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# \ _ _ \/\\\//\\\_ _ /\\\//\\\ _ _/\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# \ _ _ \/\\\\///\\\/\\\/ \/\\\ _ _///_ _ _/\\\\\\\\\\_ _ _ _/\\\\\\\\_ _\
|
||||
# \ _ _ \/\\\ _\///\\\/ _ \/\\\ _ _/\\\ _ \/\\\////// _ _ _/\\\////// _ _\
|
||||
# \ _ _ \/\\\ _ _\/// _ _ \/\\\ _ _/\\\ _ \/\\\\\\\\\\_ _ /\\\_ _ _ _ _ _\
|
||||
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ \////////\\\_ _\//\\\ _ _ _ _ _\
|
||||
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ _/\\\\\\\\\\_ _ \///\\\\\\\\_ _\
|
||||
# \ _ _ \///_ _ _ _ _ _ _ \///_ _ _///_ _ \////////// _ _ _ \//////// _ _\
|
||||
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# ─────────────────── A N D O T H E R W H A T - H A V E - Y O U S ──
|
||||
#
|
||||
#─── other needful API keys, mainly: ────────────────────────────────────────────────────
|
||||
CF_TOKEN=ESjjVFHXfe6NrBo5TrN4_AfhHNezytCVlY-VS2HD
|
||||
VISUALCROSSING_API_KEY=DAEJSKWJQ2CHM3J6B7C5FWQZV
|
||||
ELEVENLABS_API_KEY=01eeafb6ce0f6d1fd70e4aa9e7262827
|
||||
COURTLISTENER_BASE_URL=https://www.courtlistener.com
|
||||
COURTLISTENER_API_KEY=your_courtlistener_api_key_here
|
||||
TIMING_API_URL=https://web.timingapp.com/api/v1
|
||||
TIMING_API_KEY=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiIxIiwianRpIjoiODMyNTMwYTAxYjJhNzdlOTgzZWRlMjRiNDdkMmY0YWYzYWU3YTIzYjkxM2QyNTFmZjE1YTQ4YTkzYjI3YTQ4M2I0MzE5MzU2MzZlMjYyZWYiLCJpYXQiOjE2OTI0MjEyNjkuMDc4MzU4LCJuYmYiOjE2OTI0MjEyNjkuMDc4MzYsImV4cCI6MjAwODA0MDQ2OS4wNzMzMjcsInN1YiI6IjM2MTgxNzA4NDcyMTEwMzg5NzYiLCJzY29wZXMiOltdfQ.fVhhJtYb6wtHBQj7J9sxTsT3nb6_BLu4ynqNMC-SpJ2exj31wF7dHXfdGF-ZCg_H2TWh8Jsrak7ovwHsMPvkLRPgxkyjkyLgVbnzZICbP0xffrsguTnillXKCbEkwOVo4s7esf829VVagHCkpNwYbfKLJ9FLHIqs0hQMhh_S7jpbzmXUe7t6tUG43IgILBD0IwphJ2BGs5X2fhjW8FkCke85JxbQ4a29cqYtgFJ7tMP97noTFB4e_gxFpHUl-Ou_bwdpBKfarTyxhtwm1DJkQB_MrAX4py8tmFlFFJPd-7WG-LaswiI7bctN3Lux-If5oxAhm29PkS3ooxvJD86YDR0rJ94aGc8IBypnqYyGFW1ks5fsQ057UwC3XK62ezilWdamh7gtcInShSdHr7pPtIxntCe3x039NSVTBIQ54WHNaWrfI0i83Lm61ak7Ss3qotJhwtIv0aUOUKS3DOz7jfL4Z4GHUjXgBmubeC_vuLHUVCutBsiZ4Jv4QxmWKy2sPlp-r2OgJlAPkcULvTu1GvXavRTrXb7PXkEKO4ErdBTvu2RyA6XLR1MKs0V7mRNvBfuiMRvWRuv9NBLh6vpgRTVo5Tthh-ahSQ-Rd6QcmvVNf-rLnxWGY4nOdx6QLcYKPukQourR2o6QzxGcpb9pDc8X0p2SEtDrDijpy6usFxk
|
||||
MAC_ID=sij-mbp16
|
||||
MAC_UN=sij
|
||||
MAC_PW="Synchr0!"
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
#
|
||||
# CF_TOKEN: a Cloudflare token. This is used on the cf router for quick
|
||||
# deployment of new domains in tandem with Caddy and for ddns.
|
||||
#
|
||||
# VISUALCROSSING_API_KEY: used for obtaining weather forecasts. It is a very data-rich
|
||||
# yet affordable source of weather info, with a generous free
|
||||
# plan.
|
||||
#
|
||||
# ELEVENLABS_API_KEY: used when on the tts router if tts tasks are outsourced to
|
||||
# the state-of-the-art models at ElevenLabs.
|
||||
#
|
||||
# COURTLISTENER_API_KEY: used primarily on the hooks router, but likely relevant only
|
||||
# to legal professionals that will be aware what it is for.
|
||||
#
|
||||
# TIMING_API_URL: are used on the time router for generating various tasks
|
||||
# & related to timekeeping, as well as on the notes router for
|
||||
# TIMING_API_KEY: generating markdown-formatted timeslips. It requires an
|
||||
# active subscription to the Timing app (macOS or web), but
|
||||
# it's worth noting comes included in the SetApp subscribtion
|
||||
# bundle, for the same price, last I checked, as subscribing to
|
||||
# Timing alone. If you have a Mac and somehow don't know this
|
||||
# already, SetApp is an utterly insane value. I pay $15/mo for
|
||||
# apps that I would otherwise pay ~$100/mo for if subscribing
|
||||
# individually. I want to say I wasn't paid to say this, but
|
||||
# with those savings I almost feel like I was.
|
||||
#
|
||||
# MAC_ID: These last three variables are for a specific use case where
|
||||
# MAC_UN: you want certain commands run, or alerts appearing, on a
|
||||
# MAD_PW: designated macaOS computer. The alerts router is designed to
|
||||
# deliver OS-level notifications to the specified Mac when a
|
||||
# webhook gets a hit on specified keywords within the payload.
|
||||
# Setting the MAC_ID to the TS_ID of the target Mac, allows
|
||||
# the script to readily know whether it itself is the target
|
||||
# (this is relevant in a load-balancing context), and how to
|
||||
# reach the target if not — to wit, ssh using MagicDNS.
|
||||
```
|
47
requirements.txt
Normal file
47
requirements.txt
Normal file
|
@ -0,0 +1,47 @@
|
|||
python-dotenv
|
||||
setuptools
|
||||
PyPDF2
|
||||
fastapi
|
||||
pdf2image
|
||||
pdfminer
|
||||
pytesseract
|
||||
python-dateutil
|
||||
python-docx
|
||||
hypercorn
|
||||
starlette
|
||||
httpx
|
||||
pydantic
|
||||
pytz
|
||||
requests
|
||||
aiohttp
|
||||
paramiko
|
||||
tailscale
|
||||
pandas
|
||||
pydub
|
||||
torch
|
||||
selenium
|
||||
webdriver_manager
|
||||
faster_whisper
|
||||
filetype
|
||||
html2text
|
||||
markdown
|
||||
ollama
|
||||
aiofiles
|
||||
bs4
|
||||
imbox
|
||||
newspaper3k
|
||||
python-magic
|
||||
urllib3
|
||||
whisper
|
||||
huggingface_hub
|
||||
numpy
|
||||
tqdm
|
||||
tiktoken
|
||||
numba
|
||||
scipy
|
||||
vectordb
|
||||
IPython
|
||||
torchaudio
|
||||
lxml
|
||||
lxml_html_clean
|
||||
pdfminer.six
|
61
setup.py
Normal file
61
setup.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='sijapi',
|
||||
version='0.1',
|
||||
packages=find_packages(),
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'sijapi = sijapi.__main__:main',
|
||||
],
|
||||
},
|
||||
install_requires=[
|
||||
'fastapi',
|
||||
'python-dotenv',
|
||||
'hypercorn',
|
||||
'setuptools',
|
||||
'PyPDF2',
|
||||
'pdf2image',
|
||||
'pdfminer',
|
||||
'pytesseract',
|
||||
'python-dateutil',
|
||||
'python-docx',
|
||||
'starlette',
|
||||
'httpx',
|
||||
'pydantic',
|
||||
'pytz',
|
||||
'requests',
|
||||
'aiohttp',
|
||||
'paramiko',
|
||||
'tailscale',
|
||||
'pandas',
|
||||
'pydub',
|
||||
'torch',
|
||||
'selenium',
|
||||
'webdriver_manager',
|
||||
'faster_whisper',
|
||||
'filetype',
|
||||
'html2text',
|
||||
'markdown',
|
||||
'ollama',
|
||||
'aiofiles',
|
||||
'bs4',
|
||||
'pdfminer.six',
|
||||
'lxml_html_clean',
|
||||
'imbox',
|
||||
'newspaper3k',
|
||||
'python-magic',
|
||||
'urllib3',
|
||||
'whisper',
|
||||
'huggingface_hub',
|
||||
'numpy',
|
||||
'tqdm',
|
||||
'tiktoken',
|
||||
'numba',
|
||||
'scipy',
|
||||
'vectordb',
|
||||
'IPython',
|
||||
'torchaudio'
|
||||
],
|
||||
)
|
||||
|
92
sij.asc
Normal file
92
sij.asc
Normal file
|
@ -0,0 +1,92 @@
|
|||
-----BEGIN PGP PUBLIC KEY BLOCK-----
|
||||
|
||||
mQINBGY+fL4BEADCpz8FAfa6/7i9mEQCYlwwP2k9DlrUzz+u9BL4BmuoTEcGty9M
|
||||
7EA2ivRxXo371IIMjL/GyAa8I3WHMEhxuRlGldUQaHzo6PicTn+OiLJ/g2vCfStN
|
||||
jIYog3WC25P7Es1n1hDuOu8rUL93twXZ4NevgYx+G44M7Q+/1AbSXf83kpawlHhg
|
||||
HcGmH2vt9UulfTGAvN9s2sH2pn89812lpWLSdPARNw09ePZy4RdiEgJ6t+S+wjaE
|
||||
Ue/H4FcQC1MLrQnkW5soUOduY9HN0iUk/xZqqkRQctl3ds5oInE483vQsL0HKFvs
|
||||
MB8lBdXTbVzxvpFe+fvT8d6hiZ/YgxIUEl1KZLDd3atqj+UREuG+LABZUKC4nSUP
|
||||
EXneXUqi4qVCW9827K9/H+IKahe8OE+OrZAsSfLtsp4AznIxgyQbvpUZzCuRASJN
|
||||
Kt1cjcJBOv5L0HJ8tVykZd23WuKUXiyxTs1MxsDGyjew30IsAg4WNO/iw9vBO/Yu
|
||||
pfjlZTcgbghdIuNmOrnCyzKWtUxxfDtWwEBBshbTKusOaGhauBaHrRVE7lKlTblM
|
||||
x1JIzYBziDmFy25J1XvYb3guilk1yy54poLQaEcE54mQYWHKCNS4eQeL5dJR3Nmu
|
||||
Pt9GXdMyNO3uyog3WYpyYqch+osbBsHFVNUClxMycnyqZzHQeZHPNJBzJwARAQAB
|
||||
tC5TYW5neWUgSW5jZS1Kb2hhbm5zZW4gKEF0dG9ybmV5KSA8c2lqQHNpai5sYXc+
|
||||
iQJXBBMBCABBAhsDBQkHhh8tBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMjqK
|
||||
LEezdiJLNhO3U1smWu2+W0QFAmY+fPUCGQEACgkQU1smWu2+W0SwBQ/+L5S1fIop
|
||||
6iQ/6gQENBNCUVgACWP0/ViJzQGo4iF3UZkV5KV8pgk/TenZSXCLxUj6UpSAe25m
|
||||
vtrGV4NCL2hLn1NPK11Na6IM1ykfh/L67NKeCqmtQYwNLwW0o0fvUpK9fahPxhmv
|
||||
EFo+lVCabQndgzmLxnUhxH4qkGSejsaSFoJQ6fVl/DExCL4w/R5rStnRMKDtkuF1
|
||||
ONfjZpuLrAylx8Ypf/rocQYn5AJcRD5ZL2bGgDZNe85VNBFmD3b2cGSVpm3J6Rg/
|
||||
fPfs1lgtpgXWbBDCF8nRY326Utbr3qoeZUXVQjVZ05Q2SpUYFHiDZJ3EFwQikg5n
|
||||
cIBfcXQZQhTq/OK0eS0vB1li8m1ce9m8iMC+Pxe5toPkxFV5RO1+o5PG1SyOfzfV
|
||||
F1c0O9JQqdJzRHoTuqLtVhlmRVBU2d6TjWYlZ6TwPShSTLu0Tkm4EeFJS4oag75d
|
||||
q7LlIIvrWS4n3CqVpC/PEIUtclytkOkvNQaSWHEVkappS3UjkX1BJmaI8zXYh9jh
|
||||
sV/5FckvwYnky+w6geFOBs34NW0rg9oNw4KNAywYcOPbI/Ev1z57my+MpA5msw+B
|
||||
ww9sFC+tzQCSJl0FU2Dg2YMnyqfUtGr9HfXdAGuuUVh+cYFmEdwwZqBWl37pNIGL
|
||||
SxfF1AdrlHCSpJcLVETe80UraMFAI7tyOwe0L1Nhbmd5ZSBJbmNlLUpvaGFubnNl
|
||||
biA8c2FuZ3llaWpAd2VzdGVybmxhdy5vcmc+iQJUBBMBCAA+FiEEMjqKLEezdiJL
|
||||
NhO3U1smWu2+W0QFAmY+fOgCGwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgEC
|
||||
F4AACgkQU1smWu2+W0RlnBAArwaFta9NTRdubTqctv1EET1D9OXAE/R5vdSk2jRQ
|
||||
1CMYmv6KeMm0Rl7+dNFet/vJOEtITF7TZHnt7WBy7n5m+SIoARsaZYEchjZKsE2g
|
||||
6RvRWqFGYuUYQWTRKsw0b2tT16BaNLKdV/w3ndRQNS6wDJrW1dRnIWxm4z26d3/H
|
||||
Rt3o8+LUVxdSWGLliKZU00S+FNPVSwWe/X7+CoIE7T5XZL+OIEJ6DfpK2pkHKT6D
|
||||
FswF3KOLG36vz5eISk4AT+o9AEoFIpX0hce3DMixEYQSgKN230K8RchC59bO81zE
|
||||
w7Mic4vpn/wKFhicn+0BA1aJzzOd8iEwiA0p5baq4b2xIwCBiO4uv/HXR1SN1Tfk
|
||||
QozjAGzl8LzrmwGTWOtOSk/7ckPhPR2MGNhMdtJ7rPeHxImJLh+/f4uBmYnQUdw4
|
||||
0j3sMpJmrShW5dXJ8YHqVFfqabYD8HkBztdYI0qGJDpQjEbW6V+DvMWQXOZ8c1ul
|
||||
NN2vZyY25RkypMQLiphImJa+q6eGtBEas40MeAkgQKIBPBBpb6W1km+m6UnOADKB
|
||||
0/vOWcZMgijyMPp7WvwXbOwmXI27rHsUTvhFDLPI113a9I5bU8j6VyW2s/sst3Xc
|
||||
OQDzEgR3KvD4dWjczIg6yliIq9eM5hskpsYyfDfWRWrIbR3Tg8XPwnQRB9dPEHIy
|
||||
rKS0KVNhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2FuZ3llQHJpc2V1cC5uZXQ+iQJU
|
||||
BBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fQYCGwMFCQeGHy0FCwkI
|
||||
BwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0SKGA//VRGpS7IwOOlHF7OI
|
||||
+LEMDebLpLB2PswfWrK+sI9YdXXV/CaT0NcCz2HPCoK+coaDkl4cxh10ykVbjO36
|
||||
wZc/rvhpzga2wMLpBLNwpTvSlfMwsCQeRQay498bgdR59gf1hYa/dPYKKrBgNxHa
|
||||
Kc3dMDWU0adpV4zV1s/iFNQQZfmhUah+8TTlB03hahPzn8V7CqQF+jTfSXiWPv/V
|
||||
eD1W6Sc1juvLTVxTThbM5ewiIhMP2t7KM+M4viOEqce79IcE2HTcpCaEI7Lh/Eld
|
||||
9VBZZk/gENuPqyQuLbOIOQhC6LYRZkZC9Vv1FDutfWV5ZBPyaTY/n5pGW3lo+Tfa
|
||||
FLSamQcD6dyiGm/ZyQbPUDt2aWhqRGr7VvvtfyXLazL9T9Y6ASr5UjLakPr5ihUz
|
||||
B8InRch9ACPbu7QSIGFk9PQgHme2Cd/HMRLIALnkAmrafgDE+14Rlp9qI2nYhWdD
|
||||
jkZcLalPXQCDBxUfj1q192Nn3wlKsDkDd2RWT7Mc2RJq2FR36KADPMtz2oJPSib4
|
||||
eRgI40E9Wv+zqHDDTU2K/bLi3nmBHvKnXWXPyiBPVL+CAoAhkYHHJwNuRQfxlukq
|
||||
heS4/CMBRB04foTeu2ltl6/sQdAIyBGKbOC6fMyhJFYbi16nWI6j7iw2XQnqyitu
|
||||
jC8Pz14NfIAQTpKCVcV32Kn2k1+0I1Nhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2lq
|
||||
QGVudi5lc3E+iQJUBBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fRIC
|
||||
GwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0Rbxw/+
|
||||
OMYnlyXvo146+3M6JGdvW36CWmc9ZcmaU+xJM3FnG91WNo5J8MnHl0Ks9BwjNWtm
|
||||
VJgFEdi2EVpSLJnYdQyJILCNt8RAclYvbFHYUOIDEEC2yr5ZKt/odwYAXPxaqQ4O
|
||||
Sj7R2GbLA52O8zGWfARBAnAQycrlBRjItdpzGeWgRST8O/ot/IkU7xsAKW72E2VB
|
||||
9jlCahp5c01lEideVqzVhk3z6GzVz1NUKsglgEOmTIjld4mMs+4GX/93q0u1erKO
|
||||
I7Q6RL6lfdc2opGi5jFMXGWhLLgX2SSsBFJRuSQGnTpbx3XWFS5uA+cku7Fh0fC0
|
||||
MKr2vsY18Z6OqU0MdQm6ovIVcvhzIdGfnBU9Ct98DMiUhDCmx3o9XneWj1n7kWKM
|
||||
gT8s8AvE27tidtkZApwIKHdUy6qfyqwRjxE+KdL6Eh48x3TVYep+wfSfPJ1eq9Ne
|
||||
7WWXKUx6FGNH01hpQdTLbCYqmwMa03och1wwyi+0wc8rHe6k6y2tURtP3mINkDeV
|
||||
u1QmVaGRDA2r7oDm9UsFeupGsbFBnTkQIfJgnrLRJFfN2FDJPZDcd/VS71AOSL5C
|
||||
jY+Dr/WHYPWeN8MHXfG4r/P41wsrnAJEAzSvLRQ9GYCLPe825W+uDJx9eMePodFa
|
||||
BeIBcM633WXpbIXHnRQhPDfTzejCejO6GoPE7PbtBBi5Ag0EZj58vgEQAPUqNOgQ
|
||||
kAPd/S+nkGAfvnzC5UD6lVWaQTVL/xU2K1J8l11K5Ck4nq7oMKhzEitu0lA+jG7q
|
||||
JVwXMj9+rnoGlbIYmmxCZYpSit930Mss3HjYU8IAF4nybGwc5+wO77cldk3WJTI0
|
||||
EkFgiM4Jk6Gk/tRf1LgMIfJIUgm8MooPLqg2z5Pj+bbwxw42A20enEwtF3ivEETJ
|
||||
wuJwsp5uCOAfzOGqqBvp19PMTPynUBuwEXCkJfb0CCz+5yhjoi6ZjCVXxjuoe2wN
|
||||
jFwoYd8odfSuvC6Fh9qqXnjF7HZLxEyN7K1L/y/sWarsN01zbUUI3kZlnTuamDu4
|
||||
LdZtl2q3QqDyxmzHIWLTa1qL0s3WooB7JJqBYaNmQjLHadoktZ4vfhl7kjXYsg+i
|
||||
84oipL83u2cRHplpqnRk9qVwNdW01EObjNafWY6t3942sM4e/yOdQiaXlxivPuHV
|
||||
VYwme6K53lmGcV3ipMWRpNkme+oKV/TdYTTdlDaLgC8ga5AW6poNoSp5UpNeOs0E
|
||||
mxIZivpRQSCr3g+jScy0RdX/+tI1gWe+2ZIHFwR+1WsXvLXHyd1wVyH4vDxSf1bE
|
||||
VRVsXLZDT/xMGDzNzAC76kzoIykrcndFiTbNzB/LjZJuls6fRdN07bTcymWEKYiP
|
||||
Ia6iGdag6+ueoX4eDzbjCvldKtkfr/EhB7MfABEBAAGJAjwEGAEIACYWIQQyOoos
|
||||
R7N2Iks2E7dTWyZa7b5bRAUCZj58vgIbDAUJB4YfLQAKCRBTWyZa7b5bRLZdEACk
|
||||
AaXNVeywC9+X6bdwkKV5Jl6Hv238cGd58TuVbjd+tii1JazbKEqCAr5tTlGtrUZg
|
||||
fyjM0z5sMKDSZ15paX4xDbDs+xdfMxLVdjmFlZgwTrrTSIx3ODxPo/sSeyrzGZrQ
|
||||
hlZjOHP1Bvln0OTQwK0yE3Eaip0FhIpJA5FX3yrZfvza3St5leNOXsZgEri68cgf
|
||||
mVhS9tBD2I9TpCVwgq5vRnloAMgtQBYr8N9glXBfs2WsPhU96HSSH88osJW+lCkG
|
||||
vTtzQBEjnnSQ/ssHBYz4DfpsJe1fbM+9WVow6q2nkUhqg5TfdAt4H0ra2uPXnNz8
|
||||
lvQObVHlw7T0w5UTzgBdlCyYplyTG2gcZi+UWzit6YH9DH82j1otcq3+3NlrKwo0
|
||||
TSJKZNagiqgJNZ1mhJQTt3JDacFFkBBxLf6trruuyInRU1leo87hzHCxIlMbQPqh
|
||||
ogtV+W9FHElVJwoTQi8YF+0AacZPzK8wJmlPLxBeqs+ULJ8H5wZxlEBB1Jj91/W9
|
||||
6R8m2IUZCsXNNpYU+f7uB8x0RUS3pU8S7GcwdJmOa16Xc4VdfWugm4TTEtajeSYC
|
||||
ek5j/2s/QkAum5slT2Y6Aam0Jj/IhsGHKVEnR6DS01mZqVeeu0giPFUO4ZX5C0n9
|
||||
mAmw/ZUGIOj6ls3KMBHv4pqQI7nd00tW8eIMgKGgKQ==
|
||||
=PhPl
|
||||
-----END PGP PUBLIC KEY BLOCK-----
|
241
sijapi/__init__.py
Normal file
241
sijapi/__init__.py
Normal file
|
@ -0,0 +1,241 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
import ipaddress
|
||||
import multiprocessing
|
||||
from dotenv import load_dotenv
|
||||
from dateutil import tz
|
||||
from pathlib import Path
|
||||
from pydantic import BaseModel
|
||||
import traceback
|
||||
import logging
|
||||
from . import logs
|
||||
|
||||
### Logs ###
|
||||
HYPERCORN_LOG_LEVEL = None
|
||||
LOGGER = logging.getLogger('LOGGER')
|
||||
def DEBUG(d): LOGGER.debug(d)
|
||||
def INFO(i): LOGGER.debug(i)
|
||||
def WARN(w): LOGGER.warning(w)
|
||||
def ERR(e):
|
||||
LOGGER.error(e)
|
||||
LOGGER.error(traceback.format_exc())
|
||||
def CRITICAL(c):
|
||||
LOGGER.critical(c)
|
||||
LOGGER.critical(traceback.format_exc())
|
||||
|
||||
# from sijapi.config.config import load_config
|
||||
# cfg = load_config()
|
||||
|
||||
### Initial initialization
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
CONFIG_DIR = BASE_DIR / "config"
|
||||
ENV_PATH = CONFIG_DIR / ".env"
|
||||
load_dotenv(ENV_PATH)
|
||||
|
||||
### API essentials
|
||||
ROUTERS = os.getenv('ROUTERS', '').split(',')
|
||||
PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',')
|
||||
GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY")
|
||||
# HOST_NET and HOST_PORT comprise HOST, which is what the server will bind to
|
||||
HOST_NET = os.getenv("HOST_NET", "127.0.0.1")
|
||||
HOST_PORT = int(os.getenv("HOST_PORT", 4444))
|
||||
HOST = f"{HOST_NET}:{HOST_PORT}"
|
||||
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
|
||||
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
|
||||
TRUSTED_SUBNETS = [ipaddress.ip_network(subnet.strip()) for subnet in os.getenv('TRUSTED_SUBNETS', '127.0.0.1/32').split(',')]
|
||||
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
|
||||
|
||||
### Directories & general paths
|
||||
HOME_DIR = Path.home()
|
||||
ROUTER_DIR = BASE_DIR / "routers"
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
ALERTS_DIR = DATA_DIR / "alerts"
|
||||
os.makedirs(ALERTS_DIR, exist_ok=True)
|
||||
LOGS_DIR = BASE_DIR / "logs"
|
||||
os.makedirs(LOGS_DIR, exist_ok=True)
|
||||
REQUESTS_DIR = LOGS_DIR / "requests"
|
||||
os.makedirs(REQUESTS_DIR, exist_ok=True)
|
||||
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
|
||||
|
||||
|
||||
### Databases
|
||||
DB = os.getenv("DB", 'sijdb')
|
||||
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
|
||||
DB_PORT = os.getenv("DB_PORT", 5432)
|
||||
DB_USER = os.getenv("DB_USER", 'sij')
|
||||
DB_PASS = os.getenv("DB_PASS")
|
||||
DB_SSH = os.getenv("DB_SSH", "100.64.64.15")
|
||||
DB_SSH_USER = os.getenv("DB_SSH_USER")
|
||||
DB_SSH_PASS = os.getenv("DB_SSH_ENV")
|
||||
DB_URL = f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB}'
|
||||
|
||||
|
||||
### LOCATE AND WEATHER LOCALIZATIONS
|
||||
USER_FULLNAME = os.getenv('USER_FULLNAME')
|
||||
USER_BIO = os.getenv('USER_BIO')
|
||||
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
|
||||
HOME_ZIP = os.getenv("HOME_ZIP") # unimplemented
|
||||
LOCATION_OVERRIDES = DATA_DIR / "loc_overrides.json"
|
||||
LOCATIONS_CSV = DATA_DIR / "US.csv"
|
||||
# DB = DATA_DIR / "weatherlocate.db" # deprecated
|
||||
VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline")
|
||||
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
|
||||
|
||||
|
||||
### Obsidian & notes
|
||||
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or HOME_DIR / "Nextcloud" / "notes")
|
||||
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
|
||||
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
|
||||
OBSIDIAN_BANNER_DIR = f"{OBSIDIAN_RESOURCES_DIR}/banners"
|
||||
os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_BANNER_DIR, exist_ok=True)
|
||||
OBSIDIAN_BANNER_SCENE = os.getenv("OBSIDIAN_BANNER_SCENE", "wallpaper")
|
||||
OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian")
|
||||
DOC_DIR = DATA_DIR / "docs"
|
||||
os.makedirs(DOC_DIR, exist_ok=True)
|
||||
|
||||
### DATETIME SCHEMA FOR DAILY NOTE FOLDER HIERARCHY FORMATTING ###
|
||||
YEAR_FMT = os.getenv("YEAR_FMT")
|
||||
MONTH_FMT = os.getenv("MONTH_FMT")
|
||||
DAY_FMT = os.getenv("DAY_FMT")
|
||||
DAY_SHORT_FMT = os.getenv("DAY_SHORT_FMT")
|
||||
|
||||
### Large language model
|
||||
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
|
||||
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
|
||||
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
|
||||
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
|
||||
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "dolphin-mistral")
|
||||
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
|
||||
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "Luna")
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
### Stable diffusion
|
||||
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
|
||||
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
|
||||
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
|
||||
os.makedirs(SD_WORKFLOWS_DIR, exist_ok=True)
|
||||
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
|
||||
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
|
||||
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
|
||||
COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py')
|
||||
|
||||
### Summarization
|
||||
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
|
||||
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 100)) # measured in tokens
|
||||
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
|
||||
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
|
||||
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
|
||||
SUMMARY_INSTRUCT = os.getenv("SUMMARY_INSTRUCT", "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
||||
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "llama3")
|
||||
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 4096))
|
||||
|
||||
### ASR
|
||||
ASR_DIR = DATA_DIR / "asr"
|
||||
os.makedirs(ASR_DIR, exist_ok=True)
|
||||
WHISPER_CPP_DIR = HOME_DIR / str(os.getenv("WHISPER_CPP_DIR"))
|
||||
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'NULL,VOID').split(',')
|
||||
|
||||
### TTS
|
||||
PREFERRED_TTS = os.getenv("PREFERRED_TTS", "None")
|
||||
TTS_DIR = DATA_DIR / "tts"
|
||||
os.makedirs(TTS_DIR, exist_ok=True)
|
||||
VOICE_DIR = TTS_DIR / 'voices'
|
||||
os.makedirs(VOICE_DIR, exist_ok=True)
|
||||
PODCAST_DIR = TTS_DIR / "sideloads"
|
||||
os.makedirs(PODCAST_DIR, exist_ok=True)
|
||||
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
|
||||
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
|
||||
TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
|
||||
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
|
||||
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
||||
|
||||
### Calendar & email account
|
||||
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
|
||||
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
|
||||
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
|
||||
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
|
||||
class IMAP_DETAILS(BaseModel):
|
||||
email: str
|
||||
password: str
|
||||
host: str
|
||||
imap_port: int
|
||||
smtp_port: int
|
||||
imap_encryption: str = None
|
||||
smtp_encryption: str = None
|
||||
|
||||
IMAP = IMAP_DETAILS(
|
||||
email = os.getenv('IMAP_EMAIL'),
|
||||
password = os.getenv('IMAP_PASSWORD'),
|
||||
host = os.getenv('IMAP_HOST', '127.0.0.1'),
|
||||
imap_port = int(os.getenv('IMAP_PORT', 1143)),
|
||||
smtp_port = int(os.getenv('SMTP_PORT', 469)),
|
||||
imap_encryption = os.getenv('IMAP_ENCRYPTION', None),
|
||||
smtp_encryption = os.getenv('SMTP_ENCRYPTION', None)
|
||||
)
|
||||
AUTORESPONSE_WHITELIST = os.getenv('AUTORESPONSE_WHITELIST', '').split(',')
|
||||
AUTORESPONSE_BLACKLIST = os.getenv('AUTORESPONSE_BLACKLIST', '').split(',')
|
||||
AUTORESPONSE_BLACKLIST.extend(["no-reply@", "noreply@", "@uscourts.gov", "@doi.gov"])
|
||||
AUTORESPONSE_CONTEXT = os.getenv('AUTORESPONSE_CONTEXT', None)
|
||||
AUTORESPOND = AUTORESPONSE_CONTEXT != None
|
||||
|
||||
### Courtlistener & other webhooks
|
||||
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
|
||||
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
|
||||
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
|
||||
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
|
||||
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
|
||||
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
|
||||
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
|
||||
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
|
||||
|
||||
### Keys & passwords
|
||||
PUBLIC_KEY_FILE = os.getenv("PUBLIC_KEY_FILE", 'you_public_key.asc')
|
||||
PUBLIC_KEY = (BASE_DIR.parent / PUBLIC_KEY_FILE).read_text()
|
||||
MAC_ID = os.getenv("MAC_ID")
|
||||
MAC_UN = os.getenv("MAC_UN")
|
||||
MAC_PW = os.getenv("MAC_PW")
|
||||
TIMING_API_KEY = os.getenv("TIMING_API_KEY")
|
||||
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
|
||||
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
|
||||
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
|
||||
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
|
||||
|
||||
### Tailscale
|
||||
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
|
||||
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
|
||||
TS_ID = os.getenv("TS_ID", "NULL")
|
||||
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
|
||||
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
|
||||
|
||||
### Cloudflare
|
||||
CF_API_BASE_URL = os.getenv("CF_API_BASE_URL")
|
||||
CF_TOKEN = os.getenv("CF_TOKEN")
|
||||
CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
|
||||
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
|
||||
|
||||
### Caddy - not fully implemented
|
||||
BASE_URL = os.getenv("BASE_URL")
|
||||
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
|
||||
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
|
||||
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
|
||||
|
||||
|
||||
### Microsoft Graph
|
||||
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
|
||||
MS365_SECRET = os.getenv('MS365_SECRET')
|
||||
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
|
||||
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
|
||||
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
|
||||
MS365_KEY = MS365_KEY_PATH.read_text()
|
||||
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
|
||||
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
|
||||
|
||||
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
|
||||
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
|
||||
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
|
||||
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
|
||||
|
||||
### Maintenance
|
||||
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
|
||||
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours
|
133
sijapi/__main__.py
Executable file
133
sijapi/__main__.py
Executable file
|
@ -0,0 +1,133 @@
|
|||
#!/Users/sij/miniforge3/envs/api/bin/python
|
||||
from fastapi import FastAPI, Request, HTTPException, Response
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import ClientDisconnect
|
||||
from hypercorn.asyncio import serve
|
||||
from hypercorn.config import Config
|
||||
import sys
|
||||
import asyncio
|
||||
import httpx
|
||||
import argparse
|
||||
import json
|
||||
import ipaddress
|
||||
import importlib
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from . import logs
|
||||
parser = argparse.ArgumentParser(description='Personal API.')
|
||||
parser.add_argument('--debug', action='store_true', help='Set log level to DEBUG')
|
||||
parser.add_argument('--test', type=str, help='Load only the specified module.')
|
||||
args = parser.parse_args()
|
||||
logs.setup("debug")
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
|
||||
from sijapi import HOST, ENV_PATH, GLOBAL_API_KEY, REQUESTS_DIR, ROUTER_DIR, REQUESTS_LOG_PATH, PUBLIC_SERVICES, TRUSTED_SUBNETS, ROUTERS
|
||||
|
||||
|
||||
# Initialize a FastAPI application
|
||||
api = FastAPI()
|
||||
|
||||
|
||||
# CORSMiddleware
|
||||
api.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=['*'],
|
||||
allow_credentials=True,
|
||||
allow_methods=['*'],
|
||||
allow_headers=['*'],
|
||||
)
|
||||
|
||||
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
client_ip = ipaddress.ip_address(request.client.host)
|
||||
if request.method == "OPTIONS":
|
||||
# Allow CORS preflight requests
|
||||
return JSONResponse(status_code=200)
|
||||
if request.url.path not in PUBLIC_SERVICES:
|
||||
if not any(client_ip in subnet for subnet in TRUSTED_SUBNETS):
|
||||
api_key_header = request.headers.get("Authorization")
|
||||
api_key_query = request.query_params.get("api_key")
|
||||
if api_key_header:
|
||||
api_key_header = api_key_header.lower().split("bearer ")[-1]
|
||||
if api_key_header != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
|
||||
WARN(f"Invalid API key provided by a requester.")
|
||||
return JSONResponse(
|
||||
status_code=401,
|
||||
content={"detail": "Invalid or missing API key"}
|
||||
)
|
||||
response = await call_next(request)
|
||||
# DEBUG(f"Request from {client_ip} is complete")
|
||||
return response
|
||||
|
||||
api.add_middleware(SimpleAPIKeyMiddleware)
|
||||
|
||||
canceled_middleware = """
|
||||
@api.middleware("http")
|
||||
async def log_requests(request: Request, call_next):
|
||||
DEBUG(f"Incoming request: {request.method} {request.url}")
|
||||
DEBUG(f"Request headers: {request.headers}")
|
||||
DEBUG(f"Request body: {await request.body()}")
|
||||
response = await call_next(request)
|
||||
return response
|
||||
|
||||
async def log_outgoing_request(request):
|
||||
INFO(f"Outgoing request: {request.method} {request.url}")
|
||||
DEBUG(f"Request headers: {request.headers}")
|
||||
DEBUG(f"Request body: {request.content}")
|
||||
"""
|
||||
|
||||
@api.exception_handler(HTTPException)
|
||||
async def http_exception_handler(request: Request, exc: HTTPException):
|
||||
ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}")
|
||||
ERR(f"Request: {request.method} {request.url}")
|
||||
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
|
||||
|
||||
@api.middleware("http")
|
||||
async def handle_exception_middleware(request: Request, call_next):
|
||||
try:
|
||||
response = await call_next(request)
|
||||
except RuntimeError as exc:
|
||||
if str(exc) == "Response content longer than Content-Length":
|
||||
# Update the Content-Length header to match the actual response content length
|
||||
response.headers["Content-Length"] = str(len(response.body))
|
||||
else:
|
||||
raise
|
||||
return response
|
||||
|
||||
|
||||
|
||||
def load_router(router_name):
|
||||
router_file = ROUTER_DIR / f'{router_name}.py'
|
||||
DEBUG(f"Attempting to load {router_name.capitalize()}...")
|
||||
if router_file.exists():
|
||||
module_path = f'sijapi.routers.{router_name}'
|
||||
try:
|
||||
module = importlib.import_module(module_path)
|
||||
router = getattr(module, router_name)
|
||||
api.include_router(router)
|
||||
INFO(f"{router_name.capitalize()} router loaded.")
|
||||
except (ImportError, AttributeError) as e:
|
||||
CRITICAL(f"Failed to load router {router_name}: {e}")
|
||||
else:
|
||||
WARN(f"Router file for {router_name} does not exist.")
|
||||
|
||||
def main(argv):
|
||||
if args.test:
|
||||
load_router(args.test)
|
||||
else:
|
||||
CRITICAL(f"sijapi launched")
|
||||
CRITICAL(f"{args._get_args}")
|
||||
for router_name in ROUTERS:
|
||||
load_router(router_name)
|
||||
|
||||
config = Config()
|
||||
config.keep_alive_timeout = 1200
|
||||
config.bind = [HOST]
|
||||
asyncio.run(serve(api, config))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
253
sijapi/config/.config.yaml
Normal file
253
sijapi/config/.config.yaml
Normal file
|
@ -0,0 +1,253 @@
|
|||
TZ: 'America/Los_Angeles'
|
||||
|
||||
API:
|
||||
BIND: 0.0.0.0
|
||||
PORT: 4444
|
||||
URL: https://api.sij.ai
|
||||
PUBLIC:
|
||||
- /id
|
||||
- /ip
|
||||
- /health
|
||||
- /img/
|
||||
- /cl/dockets
|
||||
- /cl/search
|
||||
- /cd/alert
|
||||
TRUSTED_SUBNETS:
|
||||
- 127.0.0.1/32
|
||||
- 10.13.37.0/24
|
||||
- 100.64.64.0/24
|
||||
ROUTER:
|
||||
asr: ON
|
||||
calendar: ON
|
||||
cf: OFF
|
||||
email: ON
|
||||
health: ON
|
||||
hooks: ON
|
||||
ig: OFF
|
||||
llm: ON
|
||||
locate: ON
|
||||
note: ON
|
||||
sd: ON
|
||||
serve: ON
|
||||
summarize: ON
|
||||
time: ON
|
||||
tts: ON
|
||||
weather: ON
|
||||
## DO NOT MODIFY: ##
|
||||
ENV: '{{ DIR.CONFIG }}/.env' # auto-configured
|
||||
KEYS: ['{{ SECRET.GLOBAL }}'] # sourced from .env
|
||||
|
||||
## DO NOT MODIFY: ##
|
||||
DIR:
|
||||
BASE: '{{ AUTO.BASE_DIR }}'
|
||||
ROUTERS: '{{ DIR.BASE }}/routers'
|
||||
DATA: '{{ DIR.BASE }}/data'
|
||||
CONFIG: '{{ DIR.BASE }}/config'
|
||||
LOGS: '{{ DIR.BASE }}/logs'
|
||||
ALERTS: '{{ DIR.DATA }}/alerts'
|
||||
REQUESTS: '{{ DIR.DATA }}/requests'
|
||||
REQUESTS_LOG: '{{ DIR.LOGS }}/requests.log'
|
||||
|
||||
|
||||
HOST:
|
||||
## DO NOT MODIFY: ##
|
||||
TS:
|
||||
IP: '{{ AUTO.IP }}' # auto-configured
|
||||
ID: '{{ AUTO.TS_ID }}' # auto-configured
|
||||
MAX_CPU_CORES: '{{ MAX_CPU_CORES }}' # auto-configured
|
||||
|
||||
|
||||
CADDY:
|
||||
TS:
|
||||
IP: 100.64.64.15
|
||||
ID: 'sij-namecheap-vps'
|
||||
CADDYFILE: '/etc/caddy/Caddyfile'
|
||||
LOGS: '/etc/caddy/logs'
|
||||
|
||||
|
||||
TS:
|
||||
ID: sij-mbp16
|
||||
IP: 100.64.64.20
|
||||
SUBNET: 100.64.64.0/24
|
||||
MDNS: starling-sailfin.ts.net
|
||||
## DO NOT MODIFY: ##
|
||||
API_KEY: '{{ SECRET.TS }}' # sourced from .env
|
||||
|
||||
DB:
|
||||
NAME: sij
|
||||
HOST: 127.0.0.1
|
||||
PORT: 5432
|
||||
USER: sij
|
||||
SSH_USER: sij
|
||||
## DO NOT MODIFY: ##
|
||||
SSH_PASS: '{{ SECRET.DB_SSH }}' # sourced from .env
|
||||
URL: 'postgresql://{{ DB.USER }}:{{ SECRET.DB }}@{{ DB.HOST }}:{{ DB.PORT }}/{{ DB.NAME }}' # auto-configured
|
||||
|
||||
OBSIDIAN:
|
||||
DAILY_NOTE:
|
||||
YEAR: '%Y'
|
||||
MONTH: '%Y-%m %B'
|
||||
DAY: '%Y-%m-%d %A'
|
||||
DAY_SHORT: '%Y-%m-%d'
|
||||
DIR: '{{ HOME_DIR }}/Nextcloud/notes' # you can specify the absolute path or use '{{ HOME_DIR }}' followed by a relative path
|
||||
|
||||
MS365:
|
||||
STATUS: OFF
|
||||
AUTH:
|
||||
TENANT: bad78048-a6e0-47b1-a24b-403c444aa349
|
||||
CLIENT_ID: ce8cbd24-f146-4dc7-8ee7-51d9b69dec59
|
||||
LOGIN: 'https://login.microsoftonline.com'
|
||||
REDIRECT: 'https://api.sij.ai/o365/oauth_redirect'
|
||||
SCOPES:
|
||||
- basic
|
||||
- calendar_all
|
||||
- Calendars.Read
|
||||
- Calendars.ReadWrite
|
||||
- offline_access
|
||||
## DO NOT MODIFY: ##
|
||||
SECRET: '{{ SECRET.M365_SECRET }}' # sourced from .env
|
||||
TOKEN_FILE: '{{ DIR.CONFIG }}/ms365/oauth_token.txt' # auto-configured
|
||||
|
||||
ICAL:
|
||||
STATUS: ON
|
||||
CALENDARS:
|
||||
- ''
|
||||
|
||||
EMAIL:
|
||||
STATUS: ON
|
||||
FULLNAME: 'Sangye Ince-Johannsen'
|
||||
IMAP:
|
||||
STATUS: ON
|
||||
EMAIL: 'sij@sij.law'
|
||||
HOST: 127.0.0.1
|
||||
PORT: 1142
|
||||
ENCRYPTION: STARTTLS
|
||||
## DO NOT MODIFY: ##
|
||||
PASSWORD: '{{ SECRET.IMAP }}' # sourced from .env
|
||||
SMTP:
|
||||
STATUS: ON
|
||||
EMAIL: 'sij@sij.law'
|
||||
HOST: 127.0.0.1
|
||||
PORT: 1024
|
||||
SMTP_ENCRYPTION: SSL
|
||||
PASSWORD: '{{ SECRET.SMTP }}' # sourced from .env
|
||||
SUMMARY:
|
||||
INSTRUCT: 'You are an AI assistant that provides email summaries for Sanjay -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
|
||||
AUTORESPONSE:
|
||||
INSTRUCT: ''
|
||||
CONTEXT: 'he is taking a leave of absence until July 20, 2024 and will be unable to respond personally to emails received before then.'
|
||||
USER_BIO: 'a public interest environmental lawyer DIR.BASEd in Eugene who works at the Western Environmental Law Center and specializes in Endangered Species Act litigation.'
|
||||
WHITELIST:
|
||||
- sangye@
|
||||
- sij@
|
||||
- singdancer@
|
||||
- singdancer8@
|
||||
- vanessa.nowitzky@
|
||||
- theo.killian@
|
||||
- singdancing
|
||||
- synchronicity
|
||||
BLACKLIST:
|
||||
- '@westernlaw'
|
||||
- pete
|
||||
- sarah
|
||||
- mcmillan
|
||||
- erik
|
||||
- schlenker
|
||||
- esg
|
||||
- sristi
|
||||
- cascadia
|
||||
- csnm
|
||||
- smwc
|
||||
- sodamtn@
|
||||
- '@cascadia'
|
||||
- oregonwild
|
||||
|
||||
TIMING:
|
||||
URL: https://web.timingapp.com/api/v1
|
||||
KEY: '{{ SECRET.TIMING }}'
|
||||
|
||||
LLM:
|
||||
STATUS: ON
|
||||
URL: http://localhost:11434
|
||||
SYS: 'You are a helpful AI assistant.'
|
||||
TPW: 1.3s
|
||||
CHAT:
|
||||
MODEL: dolphin-mistral
|
||||
VISION:
|
||||
MODEL: llava-llama3
|
||||
DEFAULT_SUMMARY: dolphin-mistral
|
||||
SUMMARY:
|
||||
MODEL: llama3
|
||||
CHUNK_SIZE: 4000
|
||||
CHUNK_OVERLAP: 100
|
||||
LENGTH_RATIO: 4
|
||||
MIN_LENGTH: 150
|
||||
TOKEN_LIMIT: 4096
|
||||
INSTRUCT: 'You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
|
||||
|
||||
|
||||
ASR:
|
||||
STATUS: ON
|
||||
DIR: 'whisper.cpp'
|
||||
MODELS:
|
||||
- small
|
||||
- DIR.BASE
|
||||
- DIR.BASE-en
|
||||
- tiny
|
||||
- medium
|
||||
- medium-en
|
||||
- large
|
||||
- large-v2
|
||||
- large-v3
|
||||
|
||||
TTS:
|
||||
STATUS: ON
|
||||
USE:
|
||||
DEFAULT: XTTS
|
||||
EMAIL: XTTS
|
||||
WEBCLIP: 11L
|
||||
RSS: XTTS
|
||||
XTTS:
|
||||
STATUS: ON
|
||||
DEFAULT_VOICE: '{{ CONFIG.XTTS_DEFAULT_VOICE }}'
|
||||
11L:
|
||||
DEFAULT_VOICE: '{{ CONFIG.ELEVENLABS_DEFAULT_VOICE }}'
|
||||
STATUS: '{{ STATUS.11L }}'
|
||||
API_KEY: '{{ SECRET.ELEVENLABS }}'
|
||||
|
||||
SD:
|
||||
START_COMMAND: '{{ CONFIG.COMFYUI_START_COMMAND }}'
|
||||
URL: '{{ CONFIG.COMFYUI_URL }}'
|
||||
DIR: '{{ HOME_DIR }}/workshop/sd/ComfyUI'
|
||||
CONFIG: '{{ CONFIG_DIR }}/sd_config.yaml'
|
||||
|
||||
CF:
|
||||
URL: 'https://api.cloudflare.com/client/v4'
|
||||
TOKEN: '{{ SECRET.CLOUDFLARE }}'
|
||||
|
||||
CL:
|
||||
URL: https://www.courtlistener.com
|
||||
API_KEY: '{{ SECRET.COURTLISTENER }}'
|
||||
DOCKETS: '{{ DIR.DATA }}/cl/dockets'
|
||||
SEARCHES: '{{ DIR.DATA }}/cl/searches'
|
||||
|
||||
VC:
|
||||
URL: 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline'
|
||||
API_KEY: '{{ SECRET.VISUAL_CROSSING }}'
|
||||
|
||||
SSH:
|
||||
PRIVATE_KEY: '{{ SECRET.SSH_PRIVATE }}'
|
||||
PUBLIC_KEY: '{{ SECRET.SSH_PUBLIC }}'
|
||||
MAC_HOST: '{{ SECRET.MAC_HOST }}'
|
||||
MAC_UN: '{{ SECRET.MAC_USERNAME }}'
|
||||
MAC_PW: '{{ SECRET.MAC_PASSWORD }}'
|
||||
|
||||
PGP:
|
||||
PRIVATE_KEY: '{{ SECRET.PGP_PRIVATE }}'
|
||||
PUBLIC_KEY: '{{ SECRET.PGP_PUBLIC }}'
|
||||
|
||||
CREATE_DIRS:
|
||||
- '{{ DIR.LOGS }}'
|
||||
- '{{ DIR.ALERTS }}'
|
||||
- '{{ DIR.REQUESTS }}'
|
||||
- '{{ DIR.DOCKETS }}'
|
28
sijapi/config/MS365/.cert.key
Normal file
28
sijapi/config/MS365/.cert.key
Normal file
|
@ -0,0 +1,28 @@
|
|||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCikW67UW0RpncJ
|
||||
h4Ha9HumZ/WzgEZWRWkgksVJIOJ8t1PftctizLUlz+xMWNl+Volp4crxnPpnczis
|
||||
pOXU4g65XoFHHpF9nhF/3YDgxo5BDEM/mIIKEO9LFkIBQVBdE85qXnIVot5MfuNj
|
||||
HeyEs7doRMBxilOSR/DkT8bTWu7m5yeHlF58iYVOxxssGhP3bo7CcAcaZD1LJBnP
|
||||
Df+UBqzWQ9as903p5bFixHy2kVz8Qkd5k5tyIQ/tXqlhRfLLHG4AYHmBbS06CAg0
|
||||
nEpKUeQx4l1J/ykAjQTwhHf70xv1z0p28mHcr5ib4UvpYK9fMM6FKWenwlqA3qrK
|
||||
zQUJQ7E/AgMBAAECggEAQ5H/XIxzsSpnv+Y66y9DVd9QGNPwaFthXtCif8rTWNM6
|
||||
YXnGl8JOaPELXpBvljuR0hivqc19pxIVNG01uk5boGDPiygBgRz6WRNQRh1Bc3gN
|
||||
W5mgM17ml2cg+DSVmppo6X1oHeYcT99N1BzT+jRYv1YURx0fr2WHkt413hOlyQMR
|
||||
b8ir/TOBx3olg4KBPDuysRC5BCIr3Mkz4jsh+9wVIOReKVezsy7nxJVzipcxOyZO
|
||||
9VGgvlw4XLrRTOJEv4e3ldcg219j5KEGsJ4FFSziSmpj5fN4Vt+JmY7nueSHyL6P
|
||||
3hX52lRfOcTXTEeiEV2cXkm3h8uQ3zfiZRYi3P0DQQKBgQDXGBZc3WnfXim0u1EV
|
||||
JzZFwxBS7SHkyGgnd50ak6e9yDbdxOuYIOo4mBlc3ofd20EfT4TvR7Xyw+PD2fWJ
|
||||
+isdwCEb9JZZ1H6RDGIzSDYXGNeGId4kMKBZdmKpEeLgStihsrYp/nxtwcE/8A7N
|
||||
jCEKZj1ld7QfbQlGT/NJ4Jj80wKBgQDBfBpth6vMyCETKMJVqYd2qhVnJKiFLfRn
|
||||
OD/Ck6xwUuedbfe9M34wNO3Pn2Xvu1xVsQGb2dmlT345Iq9Z1nbZCGXyY9yfLnTV
|
||||
fz7F2utjUjaJtuiSb52SgX7MWZ8E4nbqqKnC4SYSIlaeuL9KK3r/x6bcNLAYPcdk
|
||||
qKHexDkGZQKBgF0JGyshzhiChzGYUBMBOfVk0Ru9XAq0MHDZyQdk1Io/HpRAB9Nu
|
||||
cUD3cQj9a/EnU/yyDYLeFrIhztO44/7BSYL9xpRr79h9FB2zKIqb8mF9KkPnREmN
|
||||
Ct6HWVdd2C9B0H/oZ+i0HafvxaHdONnpgaUY4feQlkV9iSRzknzi++lnAoGAXOuu
|
||||
/X80oMpUKBFhEyaxqemREdHnJN6nC5NV+6pUHDWUimSvn6vFJH2m4BlbKUC/3V9+
|
||||
uExtXBjLM8FWmTyIIz8HRttyrvfuoEHV8ctrVG29R3ISS5FTCXMrZBR+bCgemB+c
|
||||
N71NPVREaUGsjIBJN+G4XvTmxR2WTt81rfhqsokCgYEA1It9e9Ut2Krzf2FaPGLG
|
||||
ozlKhWadMNntthg3uxze80Rx8WSvgJQdbVpdbni2B/9xdYBIIljW/LGivYBrCSSp
|
||||
aXFpXL7ZGkvl3b0MkojfghIpXVGqu+8ISDtFgL0B1gZ5hq9xMBl94fLVfQgC9Cy6
|
||||
uvDHlz+fjWaWKYUPiouAtVs=
|
||||
-----END PRIVATE KEY-----
|
19
sijapi/config/MS365/.cert.pem
Normal file
19
sijapi/config/MS365/.cert.pem
Normal file
|
@ -0,0 +1,19 @@
|
|||
-----BEGIN CERTIFICATE-----
|
||||
MIIDAzCCAeugAwIBAgIUc+EtilZslnS7N6MAx0u9HeP83wAwDQYJKoZIhvcNAQEL
|
||||
BQAwETEPMA0GA1UEAwwGcHl0aG9uMB4XDTI0MDYwODEyNTcxM1oXDTI1MDYwODEy
|
||||
NTcxM1owETEPMA0GA1UEAwwGcHl0aG9uMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8A
|
||||
MIIBCgKCAQEAopFuu1FtEaZ3CYeB2vR7pmf1s4BGVkVpIJLFSSDifLdT37XLYsy1
|
||||
Jc/sTFjZflaJaeHK8Zz6Z3M4rKTl1OIOuV6BRx6RfZ4Rf92A4MaOQQxDP5iCChDv
|
||||
SxZCAUFQXRPOal5yFaLeTH7jYx3shLO3aETAcYpTkkfw5E/G01ru5ucnh5RefImF
|
||||
TscbLBoT926OwnAHGmQ9SyQZzw3/lAas1kPWrPdN6eWxYsR8tpFc/EJHeZObciEP
|
||||
7V6pYUXyyxxuAGB5gW0tOggINJxKSlHkMeJdSf8pAI0E8IR3+9Mb9c9KdvJh3K+Y
|
||||
m+FL6WCvXzDOhSlnp8JagN6qys0FCUOxPwIDAQABo1MwUTAdBgNVHQ4EFgQUS74L
|
||||
HD4Cdzh1ajatbvSHNQXIVvAwHwYDVR0jBBgwFoAUS74LHD4Cdzh1ajatbvSHNQXI
|
||||
VvAwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAhpwtVubDjsyq
|
||||
/LiTwpXKhjB/eFb6Yse782Iq+9rsiGGhsN88IA25fKgsJ2AIkR/KA7QSle3ds+1Q
|
||||
EY9/vqpWnfBdpvOi7oV7ozBe+t/5JLu1GQBzg+cVa4iLAWYCiqg1d5NDdIcYMfsM
|
||||
Yq2a3eQoP8Xbj3fFMXdNopXARa1d1zHB3ugXIJYinwMlS0EoGXVQVaHhemOh8GwW
|
||||
keRaA6TDTBFsp0Gl4jv/NrisAt4qg+rlqr0mNcQK92vRX65mDWa/cQKwpUH8+Seq
|
||||
Jl717NnsIGcqYWg8SSvVlkbFfxYhwYICXT824MAdSZtpHNCN/TegxsviYnlDyJKj
|
||||
OJzn4fCxnQ==
|
||||
-----END CERTIFICATE-----
|
1
sijapi/config/MS365/.token.txt
Normal file
1
sijapi/config/MS365/.token.txt
Normal file
|
@ -0,0 +1 @@
|
|||
{"token_type": "Bearer", "scope": "Calendars.Read Calendars.ReadWrite User.Read profile openid email", "expires_in": 3962, "ext_expires_in": 3962, "access_token": "eyJ0eXAiOiJKV1QiLCJub25jZSI6IldDeU91YXllN1RFX2FPM0F1alhlYmtvYTdVRHpUR1dVNWt5d3lJeDZ1MGciLCJhbGciOiJSUzI1NiIsIng1dCI6InE3UDFOdnh1R1F3RE4yVGFpTW92alo4YVp3cyIsImtpZCI6InE3UDFOdnh1R1F3RE4yVGFpTW92alo4YVp3cyJ9.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTAwMDAtYzAwMC0wMDAwMDAwMDAwMDAiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9iYWQ3ODA0OC1hNmUwLTQ3YjEtYTI0Yi00MDNjNDQ0YWEzNDkvIiwiaWF0IjoxNzE4Mzc0NzA5LCJuYmYiOjE3MTgzNzQ3MDksImV4cCI6MTcxODM3ODk3MiwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFWUUFxLzhYQUFBQVRnWHJ0Q1pCVjlPa1M2WldldHVVSHNMSFN0LzErYVcxT1BSSjVOWjJEL1Bzd05mY1Fxb0JTNEFZRmhLR3UvaE5TNnNWOGtLQUpmcDNNTzdqRUlNMEZrY1VaZ0IyREh4cWdOK3lUQVBUYnRVPSIsImFtciI6WyJwd2QiLCJtZmEiXSwiYXBwX2Rpc3BsYXluYW1lIjoicHl0aG9uIiwiYXBwaWQiOiJjZThjYmQyNC1mMTQ2LTRkYzctOGVlNy01MWQ5YjY5ZGVjNTkiLCJhcHBpZGFjciI6IjEiLCJmYW1pbHlfbmFtZSI6IkluY2UtSm9oYW5uc2VuIiwiZ2l2ZW5fbmFtZSI6IlNhbmd5ZSIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjY4LjIzNS40NC4yMDIiLCJuYW1lIjoiU2FuZ3llIEluY2UtSm9oYW5uc2VuIiwib2lkIjoiMWNiMWQwNDAtZmM1OS00MjMxLTllMDUtOWRjNGI0MzJjY2MxIiwicGxhdGYiOiI1IiwicHVpZCI6IjEwMDMyMDAyQTNGQjU4RjIiLCJyaCI6IjAuQVgwQVNJRFh1dUNtc1VlaVMwQThSRXFqU1FNQUFBQUFBQUFBd0FBQUFBQUFBQUMxQUk4LiIsInNjcCI6IkNhbGVuZGFycy5SZWFkIENhbGVuZGFycy5SZWFkV3JpdGUgVXNlci5SZWFkIHByb2ZpbGUgb3BlbmlkIGVtYWlsIiwic2lnbmluX3N0YXRlIjpbImttc2kiXSwic3ViIjoiV0FYVFdIR0puVFhBTjlncmIyamlEU3U4ZENOMmc0dDFacERiVHlwM1k3USIsInRlbmFudF9yZWdpb25fc2NvcGUiOiJOQSIsInRpZCI6ImJhZDc4MDQ4LWE2ZTAtNDdiMS1hMjRiLTQwM2M0NDRhYTM0OSIsInVuaXF1ZV9uYW1lIjoic2FuZ3llaWpAd2VzdGVybmxhdy5vcmciLCJ1cG4iOiJzYW5neWVpakB3ZXN0ZXJubGF3Lm9yZyIsInV0aSI6InFHcVlEODRzaDBHMFBfSEdldlVXQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfaWRyZWwiOiIxIDIiLCJ4bXNfc3QiOnsic3ViIjoieXhjdzFhV1FiM2VrX0FvNFRuRy11SDN6ZndGbVRRUmMxVGpFaEdqZ2p2WSJ9LCJ4bXNfdGNkdCI6MTY0ODY4MTc1Mn0.ssgIrbYo1SPNusoB9bNIB7pLxCmwBKhox__KOnwRRtnE63vbfGWAl53ww1KpNWPdDfC3p94yuPybTRqjZnTPluv1oJgGINml4AleUnZJnJttRsFHvGflzKOLtXnzmhQGUBXxu7QucKTCMH4J36neeQAWthITMwCHbaGmSy0RLotaIsoEHIufxR9ZEYD4XP5e3sFX54eSnyf4P3GgHHC1y5xxWUlemG4G1BRas8i7oX9o-gqRFube6BMtCLir_HMTNPfrCG-lhd9msLhc6e_WJSmLMHQ7RVLo-GlTMY9UouE190GzBBVKUrTg462I3kP_GayO1kt6qopBrwnF6bDUsw", "refresh_token": "0.AX0ASIDXuuCmsUeiS0A8REqjSSS9jM5G8cdNjudR2bad7Fm1AI8.AgABAwEAAAApTwJmzXqdR4BN2miheQMYAgDs_wUA9P_rTWFRiXWkWxvihyyXonsZPLrulRvnKKRlZ9PxKUltEOQsxjlg86xvCYzAS6dYeDBQiQxRAS_WEuuXVmTqUWDVwqgwQOa3BCbLwxQhPwfG-O9uFY6D239Jo8rdXTrf8XOntGs6fCn3wuo5kvJr2D-FGRA_EepltvRxZgrWdHROKuqoL_ArjLDdoFP7zM95MKhVYTmCO7LCM7u6O9ItU4_6y2_lH864zUivT1LFG8-h9sx0Ln3wd8LBP3P5GSeXwtQlkbNpj1FNDl_Ex5SwGCTM7uDHj0dn5CdUMgLkOcAC__HJdzmlEryTquoXcjd1RAmkq1MqAGD7QQreI7NQTZXwTcjoMwiBg92-bk-_o2ajeIVqzgOVBQIu1W8gkN2F7PAqRc5lGB-2mAXchqKMoL31CLUPxgTMBjWgR4waAjfZXT4h2WqXAAdGFy2nzUJAjyEQa9ZW1J5B6asCf3cVJQwI6nWIN7OphrXkGHl0ffpfrC-skVG3N2vrelAutRvyvWi4bbMqAZNglRrkTn5G_kULmnyydZBcFSc5uPmKD7OkfBD5UpTa_KLTjYexWRVsBfG9czIVxOh3ojnnza9BjrN5cHwHhzPM1t67E5iqronvT2OR_r-4BerUfRNHXrxwrLvDUEZwQ8o5IRs2N5FH0y_QN049o_NTgqytCj6wrIB4T-ZBUK2AsFej7ipdHAMYtWLZdoAo1o4nMuPBb4syN0VYd1sLUP-RQ5iv7wIkMWmNjhjIErIktZ134pGK9TlWa904H6HUin0qNTXyTmX2feE0nBlm6xJbO1ISfFkaf8aEjcAMfeu9qiArKQqUgvY", "expires_at": 1718378971}
|
121
sijapi/config/cf_domains.json
Normal file
121
sijapi/config/cf_domains.json
Normal file
|
@ -0,0 +1,121 @@
|
|||
{
|
||||
"sij.ai": {
|
||||
"zone_id": "9c00a9e0ff540308232eb5762621d5b1",
|
||||
"subdomains": {
|
||||
"www.sij.ai": "8a26b17923ac3a8f21b6127cdb3d7459",
|
||||
"chat.sij.ai": "f2e6a3a25f58dae627c9982efeeff50f",
|
||||
"ab.sij.ai": "458c8b1c4347d3037d83880b628cf1ce",
|
||||
"s3.sij.ai": "b77d74526d244271fc132e728fee4f49",
|
||||
"urls.sij.ai": "6b9525aae570ac4a920ad70cae06987c",
|
||||
"api.sij.ai": "8a336ee8a5b13e112d6d4ae77c149bd6",
|
||||
"dt.sij.ai": "7ab8343763799df690a8584c48a4e6c3",
|
||||
"temp.sij.ai": "fa5190e2818df6362c7488a92227f4de",
|
||||
"dns.sij.ai": "3e7494752833ec17f051ddb02d9e3a66",
|
||||
"ftp.sij.ai": "94a29faa307efee2e0b941fa4ecc5b68",
|
||||
"up.sij.ai": "e6ec3556d53851f09b211b46dc5242f1",
|
||||
"txt.sij.ai": "b4b0bd48ac4272b1c48eb1624072adb2",
|
||||
"ollama.sij.ai": "c589b5a830ac98f6351cdaf45d5fc491",
|
||||
"ai.sij.ai": "77ce76efa702b55f7fcd886e77e0b4d5",
|
||||
"khoj.sij.ai": "e8727aa222b4a866aaf8876c93a55668",
|
||||
"img.sij.ai": "bf17d4eda2db05463a2170ae72fdc13d",
|
||||
"git.sij.ai": "cd524c00b6daf824c933a294cb52eae2"
|
||||
}
|
||||
},
|
||||
"sij.law": {
|
||||
"zone_id": "5b68d9cd99b896e26c232f03cda89d66",
|
||||
"subdomains": {
|
||||
"www.sij.law": "ba9afd99deeb0407ea1b74ba88eb5564",
|
||||
"map.sij.law": "4de8fe05bb0e722ee2c78b2ddf553c82",
|
||||
"watch.sij.law": "4678d8d445ff8c62d01c846e9b90f2b7",
|
||||
"dav.sij.law": "071df85a6198803a3bc56048ce088413",
|
||||
"dt.sij.law": "6d7851639114bc07cd4ad6e85aa049e3",
|
||||
"files.sij.law": "0b23d35ce534f2bda8dfb24c2eef25aa",
|
||||
"hook.sij.law": "f531c5f80a89b473d3605266e02ccd2d",
|
||||
"net.sij.law": "0cfc569acd53d381759eed0b9b6b8ebf",
|
||||
"cloud.sij.law": "2c0e4536d0eae25ec253ca34a8028bc1",
|
||||
"langtool.sij.law": "6bf5d51e1902140c6cca579c0b26f749",
|
||||
"temp.sij.law": "66d8b110a6bd95889afb3139ed4fd499",
|
||||
"archive.sij.law": "c03421e10a344279aa53cc2e2d15296c",
|
||||
"rss.sij.law": "678a3d6c6bd17e4207ec183d0858ed78",
|
||||
"keys.sij.law": "4ebf14f325757cbbcbc02bffdeaaa1cb",
|
||||
"imap.sij.law": "384acd03c139ffaed37f4e70c627e7d1",
|
||||
"smtp.sij.law": "0677e42ea9b589d67d1da21aa00455e0"
|
||||
}
|
||||
},
|
||||
"lone.blue": {
|
||||
"zone_id": "2a86fff4c35118fce68220cfc707077f",
|
||||
"subdomains": {
|
||||
"ai.lone.blue": "51dbf8d11716d838f7dc57fda32e175f",
|
||||
"api.lone.blue": "d4a0a25b688f3871b1e215788dd69a0b",
|
||||
"cloud.lone.blue": "5036ab6d7c1ca9feb2272545afb89b44",
|
||||
"jackett.lone.blue": "a9f4614ea55772b674271c6a94119780",
|
||||
"lone.blue": "35003b908c5870bdd3d69416aa9af6ee",
|
||||
"pi.lone.blue": "cabb41432cef273cbc5eb50d28a152f9",
|
||||
"pod.lone.blue": "c2383b5781ff7972f762eb43af5f8f0f",
|
||||
"router.lone.blue": "4a775be78ccbefe165e5b195c648a8a4",
|
||||
"rss.lone.blue": "4a775be78ccbefe165e5b195c648a8a4",
|
||||
"s3.lone.blue": "3a34ad5507b112cf4e296281796cc5eb",
|
||||
"vault.lone.blue": "b275c72f546f74b9264753d1952df092",
|
||||
"whale.lone.blue": "267d2e23dcf46edef0a1e9bb7a7db9bc",
|
||||
"ab.lone.blue": "85c3155bbd078971c4d2f7cca41ad510",
|
||||
"dns.lone.blue": "e01b1bfa5696452b827fc5220b77fda8",
|
||||
"chat.lone.blue": "c0a141ee64fb8bef5efc9c2556979e99",
|
||||
"vector.lone.blue": "544082d1051a2a184112ef1f8c9ba389",
|
||||
"jump.lone.blue": "67b523a20609337e44a643763cb86e9e",
|
||||
"up.lone.blue": "79821180b99598660cebca6f1c2b0471",
|
||||
"cdb.lone.blue": "8ce4d70a7ec1e1b9d625d890e2d7e9bb",
|
||||
"fap.lone.blue": "6fff3cef2a7331fb718cc240c6217ed8"
|
||||
}
|
||||
},
|
||||
"lone.earth": {
|
||||
"zone_id": "abc8f28cfe88ebdfbf568d9ebf520e99",
|
||||
"subdomains": {
|
||||
"lone.earth": "2cf41011a69dc5ad8f2c9f73e1da51d0"
|
||||
}
|
||||
},
|
||||
"subtle.host": {
|
||||
"zone_id": "3b4bb8899f232b848ec4d1d78d81cb30",
|
||||
"subdomains": {
|
||||
"subtle.host": "3b4bb8899f232b848ec4d1d78d81cb30",
|
||||
"code.subtle.host": "94046b2e9d3e2f1c28f83fbf5e64c1b6",
|
||||
"dl.subtle.host": "465ea87e6a1d0a773b6142979fffccfb",
|
||||
"llm.subtle.host": "59ba6a0d1990992a5539100d22fc6463",
|
||||
"media.subtle.host": "c183b47a598253e66dcbf3250064bffe",
|
||||
"qbt.subtle.host": "a465ac7849e2de8ef17267272178dca0",
|
||||
"sync.subtle.host": "8449a402481913d5068ebf8eebdac079",
|
||||
"vw.subtle.host": "5beb827873ab39467204c9becae3a929",
|
||||
"zabbix.subtle.host": "9db9169f6099f54ee9ae6d4fd988d985",
|
||||
"dns.subtle.host": "9e046ebc14ebcd082b0d87c86d6dd502",
|
||||
"fileserver.subtle.host": "8ade682a91ad04da227aaf5af2fdcad8",
|
||||
"st.subtle.host": "71aad71cfc05f8512366c1e5129baa8a",
|
||||
"fap.subtle.host": "6e10970d8b962cb84b6ee951bf54730a",
|
||||
"home.subtle.host": "78320db057c9a7b87586192203e2cdc1",
|
||||
"jackett.subtle.host": "a4306475d9e8d4257cd7e8b113bf910c"
|
||||
}
|
||||
},
|
||||
"env.esq": {
|
||||
"zone_id": "faf889fd7c227c2e61875b2e70b5c6fe",
|
||||
"subdomains": {
|
||||
"api.env.esq": "b6da6ae8cdd376c1a0742a8b540d53df",
|
||||
"cloud.env.esq": "cd394e73af0af91e4ddba9fe5b5f4db9",
|
||||
"dav.env.esq": "e42852bf0f417b7eca727b87eb235568",
|
||||
"dt.env.esq": "afbc205e829cfb8d3f79dab187c06f99",
|
||||
"env.esq": "b9b636ce9bd4812a6564f572f0f373ee",
|
||||
"minio.env.esq": "86a087ec53a98a06541589ef3720cfea",
|
||||
"n8n.env.esq": "37850b2ba507ddceaab1e00050ae8155",
|
||||
"nas.env.esq": "6ab124507384bb648cc33c06184e758b",
|
||||
"pi.env.esq": "fbdf93acaf7e1a384c4f970e5ffb5a22",
|
||||
"router.env.esq": "f2b9af49ea7b2843e3803bd2f0026aba",
|
||||
"rss.env.esq": "f043d5cf485f4e53f9cbcb85fed2c861",
|
||||
"s3.env.esq": "a5fa431a4be8f50af2c118aed353b0ec",
|
||||
"dns.env.esq": "e10fbba777c90775a87aad47d342a0c1",
|
||||
"sij.env.esq": "9ee66e66a516e21414d871e81f920a27",
|
||||
"monitor.env.esq": "e03a7cd101805dec6b9d44203b31f27a",
|
||||
"kopia.env.esq": "165b065140d314f0a9b34147d4730439",
|
||||
"ftp.env.esq": "dd9dac2ff374f36de2113c291b709e4b",
|
||||
"map.env.esq": "56142c1b040e8f2f05691b75d5b94b16",
|
||||
"hook.env.esq": "6a7c14ef6394d23ee1a3db9de8b831ad",
|
||||
"testing.env.esq": "103ada77c3d8d199ccf2622c63f5172a"
|
||||
}
|
||||
}
|
||||
}
|
98
sijapi/config/config.py
Normal file
98
sijapi/config/config.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
import os
|
||||
import yaml
|
||||
from time import sleep
|
||||
from pathlib import Path
|
||||
import ipaddress
|
||||
|
||||
import yaml
|
||||
|
||||
class Config:
|
||||
def __init__(self, yaml_file):
|
||||
with open(yaml_file, 'r') as file:
|
||||
self.data = yaml.safe_load(file)
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self.data:
|
||||
value = self.data[name]
|
||||
if isinstance(value, dict):
|
||||
return ConfigSection(value)
|
||||
return value
|
||||
raise AttributeError(f"Config has no attribute '{name}'")
|
||||
|
||||
class ConfigSection:
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self.data:
|
||||
value = self.data[name]
|
||||
if isinstance(value, dict):
|
||||
return ConfigSection(value)
|
||||
return value
|
||||
raise AttributeError(f"ConfigSection has no attribute '{name}'")
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name == 'data':
|
||||
super().__setattr__(name, value)
|
||||
else:
|
||||
self.data[name] = value
|
||||
|
||||
# Load the YAML configuration file
|
||||
CFG = Config('.config.yaml')
|
||||
|
||||
# Access existing attributes
|
||||
print(CFG.API.PORT) # Output: localhost
|
||||
|
||||
def load_config():
|
||||
yaml_file = os.path.join(os.path.dirname(__file__), ".config.yaml")
|
||||
|
||||
HOME_DIR = Path.home()
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
CONFIG_DIR = BASE_DIR / "config"
|
||||
ROUTER_DIR = BASE_DIR / "routers"
|
||||
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
|
||||
ALERTS_DIR = DATA_DIR / "alerts"
|
||||
os.makedirs(ALERTS_DIR, exist_ok=True)
|
||||
|
||||
LOGS_DIR = BASE_DIR / "logs"
|
||||
os.makedirs(LOGS_DIR, exist_ok=True)
|
||||
REQUESTS_DIR = LOGS_DIR / "requests"
|
||||
os.makedirs(REQUESTS_DIR, exist_ok=True)
|
||||
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
|
||||
DOC_DIR = DATA_DIR / "docs"
|
||||
os.makedirs(DOC_DIR, exist_ok=True)
|
||||
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
|
||||
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
|
||||
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
|
||||
|
||||
|
||||
|
||||
try:
|
||||
with open(yaml_file, 'r') as file:
|
||||
config_data = yaml.safe_load(file)
|
||||
|
||||
vars = {
|
||||
|
||||
|
||||
"API": {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
config = Config(config_data)
|
||||
return config
|
||||
except Exception as e:
|
||||
print(f"Error while loading configuration: {e}")
|
||||
return None
|
||||
|
||||
def reload_config():
|
||||
while True:
|
||||
global config
|
||||
with open('config.yaml', 'r') as file:
|
||||
config_data = yaml.safe_load(file)
|
||||
config = Config(config_data)
|
||||
sleep(300) # reload every 5 minutes
|
151
sijapi/config/llms.json-example
Normal file
151
sijapi/config/llms.json-example
Normal file
|
@ -0,0 +1,151 @@
|
|||
{
|
||||
"Alpaca": {
|
||||
"models": [
|
||||
"mythomax",
|
||||
"openhermes",
|
||||
"deepseek"
|
||||
],
|
||||
"prefix": "\n### Instruction:\n",
|
||||
"stops": [
|
||||
"### Instruction"
|
||||
],
|
||||
"suffix": "\n### Response:\n",
|
||||
"sysPrefix": "### System\n",
|
||||
"sysSuffix": "\n"
|
||||
},
|
||||
"Amazon": {
|
||||
"models": [
|
||||
"mistrallite"
|
||||
],
|
||||
"prefix": "<|prompter|>",
|
||||
"stops": [
|
||||
"<|prompter|>",
|
||||
"</s>"
|
||||
],
|
||||
"suffix": "</s><|assistant|>",
|
||||
"sysPrefix": "",
|
||||
"sysSuffix": ""
|
||||
},
|
||||
"ChatML": {
|
||||
"models": [
|
||||
"dolphin",
|
||||
"capybara",
|
||||
"nous-hermes-2"
|
||||
],
|
||||
"prefix": "<|im_end|>\n<|im_start|>user\n",
|
||||
"stops": [
|
||||
"<|im_end|>",
|
||||
"<|im_start|>"
|
||||
],
|
||||
"suffix": "<|im_end|>\n<|im_start|>assistant\n",
|
||||
"sysPrefix": "<|im_start|>system\n",
|
||||
"sysSuffix": "<|im_end|>"
|
||||
},
|
||||
"Llama2": {
|
||||
"models": [
|
||||
"llama2-placeholder"
|
||||
],
|
||||
"prefix": "\n\n[INST] ",
|
||||
"stops": [
|
||||
"[/INST]",
|
||||
"[INST]"
|
||||
],
|
||||
"suffix": "[/INST]\n\n",
|
||||
"sysPrefix": "",
|
||||
"sysSuffix": "\n\n"
|
||||
},
|
||||
"Mistral": {
|
||||
"models": [
|
||||
"mistral-instruct",
|
||||
"mixtral-8x7b-instruct"
|
||||
],
|
||||
"prefix": "\n[INST] ",
|
||||
"stops": [
|
||||
"[/INST]",
|
||||
"[INST]",
|
||||
"</s>"
|
||||
],
|
||||
"suffix": "[/INST]\n",
|
||||
"sysPrefix": "",
|
||||
"sysSuffix": "\n<s>"
|
||||
},
|
||||
"Orca": {
|
||||
"models": [
|
||||
"upstage",
|
||||
"neural",
|
||||
"solar",
|
||||
"SOLAR"
|
||||
],
|
||||
"prefix": "\n### User:\n",
|
||||
"stops": [
|
||||
"###",
|
||||
"User:"
|
||||
],
|
||||
"suffix": "\n### Assistant:\n",
|
||||
"sysPrefix": "### System:\n",
|
||||
"sysSuffix": "\n"
|
||||
},
|
||||
"Phi2": {
|
||||
"models": [
|
||||
"phi-2"
|
||||
],
|
||||
"prefix": "\nSangye: ",
|
||||
"stops": [
|
||||
"###",
|
||||
"User Message"
|
||||
],
|
||||
"suffix": "\nAssistant: ",
|
||||
"sysPrefix": "Systen: ",
|
||||
"sysSuffix": "\n"
|
||||
},
|
||||
"Phind": {
|
||||
"models": [
|
||||
"phind"
|
||||
],
|
||||
"prefix": "\n### User Message\n",
|
||||
"stops": [
|
||||
"###",
|
||||
"User Message"
|
||||
],
|
||||
"suffix": "\n### Assistant\n",
|
||||
"sysPrefix": "### System Prompt\n",
|
||||
"sysSuffix": "\n"
|
||||
},
|
||||
"Vicuna": {
|
||||
"models": [
|
||||
"xwin",
|
||||
"synthia",
|
||||
"tess"
|
||||
],
|
||||
"prefix": "\nUSER: ",
|
||||
"stops": [
|
||||
"</s>",
|
||||
"USER:",
|
||||
"SYSTEM:"
|
||||
],
|
||||
"suffix": "</s>\nASSISTANT: ",
|
||||
"sysPrefix": "SYSTEM: ",
|
||||
"sysSuffix": "\n"
|
||||
},
|
||||
"Zephyr": {
|
||||
"models": [
|
||||
"zephyr"
|
||||
],
|
||||
"prefix": " ",
|
||||
"stops": [
|
||||
"</s>"
|
||||
],
|
||||
"suffix": "</s>\n ",
|
||||
"sysPrefix": " ",
|
||||
"sysSuffix": "</s>\n"
|
||||
},
|
||||
"default": {
|
||||
"prefix": "\n### Instruction:\n",
|
||||
"stops": [
|
||||
"### Instruction"
|
||||
],
|
||||
"suffix": "\n### Response:\n",
|
||||
"sysPrefix": "### System\n",
|
||||
"sysSuffix": "\n"
|
||||
}
|
||||
}
|
43
sijapi/config/sd-example.json
Normal file
43
sijapi/config/sd-example.json
Normal file
|
@ -0,0 +1,43 @@
|
|||
{
|
||||
"scenes": [
|
||||
{
|
||||
"scene": "default",
|
||||
"triggers": [""],
|
||||
"API_PPrompt": "(Highly-detailed) image of ",
|
||||
"API_SPrompt": "; ((masterpiece)); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
|
||||
"API_NPrompt": "`oil, paint splash, oil effect, dots, paint, freckles, liquid effect, canvas frame, 3d, bad art, asian, illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, explicit, topless`",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
|
||||
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this scene description to its essence, staying true to what it describes: ",
|
||||
"workflows": [{"workflow": "turbo.json", "size": "1024x768"}]
|
||||
},
|
||||
{
|
||||
"scene": "portrait",
|
||||
"triggers": [
|
||||
"portrait",
|
||||
"profile",
|
||||
"headshot"
|
||||
],
|
||||
"API_PPrompt": "Highly-detailed portrait photo of ",
|
||||
"API_SPrompt": "; attractive, cute, (((masterpiece))); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
|
||||
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, nude",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic portrait photos. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on the pictured individual's eyes, pose, and other distinctive features. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words. Remember that the final product will be a still image, and action verbs are not as helpful as simple descriptions of position, appearance, background, etc.",
|
||||
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this portrait photo to its essence: ",
|
||||
"workflows": [
|
||||
{
|
||||
"workflow": "selfie.json",
|
||||
"size": "768x1024"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"scene": "wallpaper",
|
||||
"triggers": ["wallpaper"],
|
||||
"API_PPrompt": "Stunning widescreen image of ",
|
||||
"API_SPrompt": ", masterpiece, (subtle:0.7), (nuanced:0.6), best quality, ultra detailed, ultra high resolution, 8k, (documentary:0.3), cinematic, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, earthporn, (eliot porter:0.6), (frans lanting:0.4), (daniel kordan:0.6), landscapephotography, ultra detailed, earth tones, moody",
|
||||
"API_NPrompt": "FastNegativeV2, (easynegative:0.5), canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, Photoshop, video game, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, (Thomas Kinkade:0.5), sentimental, kitsch, kitschy, twee, commercial, holiday card, modern, futuristic, urban, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
|
||||
"LLM_PrePrompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. You can select any place on Earth that a young model from the Pacific Northwest is likely to travel to. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. This model is especially fond of wild and rugged places, mountains. She favors dark muted earth tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instread strive for nuance and originality in composition and environment.",
|
||||
"workflows": [{"workflow": "landscape.json", "size": "1160x768"}]
|
||||
}
|
||||
]
|
||||
}
|
102
sijapi/config/sd.json
Normal file
102
sijapi/config/sd.json
Normal file
|
@ -0,0 +1,102 @@
|
|||
{
|
||||
"scenes": [
|
||||
{
|
||||
"scene": "default",
|
||||
"triggers": [""],
|
||||
"API_PPrompt": "(Highly-detailed) image of ",
|
||||
"API_SPrompt": "; ((masterpiece)); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
|
||||
"API_NPrompt": "`oil, paint splash, oil effect, dots, paint, freckles, liquid effect, canvas frame, 3d, bad art, asian, illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, explicit, topless`",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
|
||||
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this scene description to its essence, staying true to what it describes: ",
|
||||
"workflows": [{"workflow": "turbo.json", "size": "1024x768"}]
|
||||
},
|
||||
{
|
||||
"scene": "landscape",
|
||||
"triggers": ["lanscape", "vista", "scenic", "pastoral", "mountains", "outdoor", "outside", "adventure"],
|
||||
"API_PPrompt": "Moody landscape photograph of ",
|
||||
"API_SPrompt": ", masterpiece, (cinematic:0.5), beautiful lighting, dynamic lighting, (subtle:0.4), (nuanced:0.3), subdued, fine detail, best quality, ultra detailed, ultra high resolution, 8k, (documentary:0.3), cinematic, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, pastoral, earthporn, (eliot porter:0.6), (frans lanting:0.4), (daniel kordan:0.6), landscapephotography, ultra detailed, extremely sharp, insane detail, 8k, earth tones, moody",
|
||||
"API_NPrompt": "FastNegativeV2, (easynegative:0.5), canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, Photoshop, video game, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, (Thomas Kinkade:0.5), sentimental, kitsch, kitschy, twee, commercial, holiday card, modern, futuristic, urban, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
|
||||
"LLM_PrePrompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. You can select any place on Earth that a young model from the Pacific Northwest is likely to travel to. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. This model is especially fond of wild and rugged places, mountains. She favors dark muted earth tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instread strive for nuance and originality in composition and environment.",
|
||||
"workflows": [{"workflow": "landscape.json", "size": "1080x800"}]
|
||||
},
|
||||
{
|
||||
"scene": "wallpaper",
|
||||
"triggers": ["landscape", "vista", "scenic", "pastoral", "mountains", "outdoor", "outside", "adventure"],
|
||||
"API_PPrompt": "",
|
||||
"API_SPrompt": ", masterpiece, cinematic, beautiful lighting, subtle, nuanced, fine detail, best quality, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, pastoral, earthporn, landscapephotography, ultra detailed, extremely sharp, insane detail, 8k, earth tones, moody",
|
||||
"API_NPrompt": "FastNegativeV2, easynegative, canvas frame, 3d, bad art, illustrated, deformed, blurry, duplicate, video game, render, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, Thomas Kinkade, sentimental, kitsch, kitschy, twee, commercial, holiday card, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visuallnsive_wilsony descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
|
||||
"LLM_PrePrompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. Favors dark tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instead strive for nuance and originality in composition and environment.",
|
||||
"workflows": [{"workflow": "wallpaper.json", "size": "1024x512"}]
|
||||
},
|
||||
{
|
||||
"scene": "portrait",
|
||||
"triggers": [
|
||||
"portrait",
|
||||
"profile",
|
||||
"headshot"
|
||||
],
|
||||
"API_PPrompt": "Highly-detailed portrait photo of ",
|
||||
"API_SPrompt": "; attractive, cute, (((masterpiece))); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
|
||||
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, nude",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic portrait photos. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on the pictured individual's eyes, pose, and other distinctive features. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words. Remember that the final product will be a still image, and action verbs are not as helpful as simple descriptions of position, appearance, background, etc.",
|
||||
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this portrait photo to its essence: ",
|
||||
"workflows": [
|
||||
{
|
||||
"workflow": "selfie.json",
|
||||
"size": "768x1024"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"scene": "doggystyle",
|
||||
"triggers": [
|
||||
"doggystyle",
|
||||
"doggy-style",
|
||||
"doggy style",
|
||||
"from behind"
|
||||
],
|
||||
"API_PPrompt": "Explicit highly-detailed image of ",
|
||||
"API_SPrompt": "; ((from behind)), (((doggystyle))), explicit, ((tiny breasts)), flat chest, (((young nude girl))), cute, (covered in cum), sex, porn, nsfw, (((masterpiece))); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
|
||||
"API_NPrompt": "canvas frame, 3d, ((bad art)), ((asian)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, censored, pg13",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic erotic/pornographic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on 18+ details concerning body parts, position, etc. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words. Remember that the final product will be a still image, and action verbs are not as helpful as simple descriptions of position, appearance, body parts and fluids, etc.",
|
||||
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this pornographic scene description of doggystyle sex to its erotic essence, staying true to what it describes no matter how kinky or taboo: ",
|
||||
"workflows": [
|
||||
{
|
||||
"workflow": "xxx/doggy.json",
|
||||
"size": "1024x768",
|
||||
"preset_values": {
|
||||
"node": "133",
|
||||
"type": "depth_map",
|
||||
"key": "image",
|
||||
"values": [
|
||||
"xxx/doggy/DOGGY_001.png",
|
||||
"xxx/doggy/DOGGY_002.png",
|
||||
"xxx/doggy/DOGGY_003.png"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"scene": "nsfw",
|
||||
"triggers": [
|
||||
"nude",
|
||||
"naked",
|
||||
"undressed"
|
||||
],
|
||||
"API_PPrompt": "Explicit highly-detailed image of ",
|
||||
"API_SPrompt": "; ((tiny breasts)), flat chest, (((young nude girl))), cute, nsfw, (((masterpiece))); ((beautiful lighting), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
|
||||
"API_NPrompt": "canvas frame, 3d, ((bad art)), ((asian)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, censored, pg13",
|
||||
"LLM_SysMsg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic erotic art. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on details concerning body parts, position, etc. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts.",
|
||||
"LLM_PrePrompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this image of a young girl or woman to its erotic essence: ",
|
||||
"workflows": [
|
||||
{
|
||||
"workflow": "nude.json",
|
||||
"size": "768x1024"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
BIN
sijapi/helpers/calendar/exportCal.scpt
Normal file
BIN
sijapi/helpers/calendar/exportCal.scpt
Normal file
Binary file not shown.
2
sijapi/helpers/calendar/updateCal
Executable file
2
sijapi/helpers/calendar/updateCal
Executable file
|
@ -0,0 +1,2 @@
|
|||
#!/bin/bash
|
||||
osascript /Users/sij/workshop/sijapi/helpers/updateCal.scpt
|
BIN
sijapi/helpers/calendar/updateCal.scpt
Executable file
BIN
sijapi/helpers/calendar/updateCal.scpt
Executable file
Binary file not shown.
BIN
sijapi/helpers/calendar/updateCal2.scpt
Normal file
BIN
sijapi/helpers/calendar/updateCal2.scpt
Normal file
Binary file not shown.
195
sijapi/helpers/courtlistener/clHooks.py
Normal file
195
sijapi/helpers/courtlistener/clHooks.py
Normal file
|
@ -0,0 +1,195 @@
|
|||
from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, status
|
||||
from fastapi.responses import JSONResponse
|
||||
import httpx
|
||||
import json
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
import os, io
|
||||
from PyPDF2 import PdfReader
|
||||
import aiohttp
|
||||
|
||||
hook = FastAPI()
|
||||
|
||||
|
||||
# /Users/sij/Library/CloudStorage/OneDrive-WELC/Documents - WELC-Docket
|
||||
SYNC_FOLDER = Path(__file__).resolve().parent.parent
|
||||
HOME_FOLDER = Path.home()
|
||||
DOCKETS_FOLDER = HOME_FOLDER / "Dockets"
|
||||
SEARCH_FOLDER = HOME_FOLDER / "Watched Cases"
|
||||
SCRIPTS_FOLDER = SYNC_FOLDER / ".scripts"
|
||||
REQUESTS_FOLDER = HOME_FOLDER / "sync" / "requests"
|
||||
COURTLISTENER_BASE_URL = "https://www.courtlistener.com"
|
||||
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
|
||||
COURTLISTENER_API_KEY = "efb5fe00f3c6c88d65a32541260945befdf53a7e"
|
||||
|
||||
with open(SCRIPTS_FOLDER / 'caseTable.json', 'r') as file:
|
||||
CASE_TABLE = json.load(file)
|
||||
|
||||
@hook.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
@hook.post("/cl/docket")
|
||||
async def respond(request: Request, background_tasks: BackgroundTasks):
|
||||
client_ip = request.client.host
|
||||
logging.info(f"Received request from IP: {client_ip}")
|
||||
data = await request.json()
|
||||
payload = data['payload']
|
||||
results = data['payload']['results']
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_docket.json"
|
||||
with open(payload_file, 'w') as file:
|
||||
json.dump(payload, file, indent=2)
|
||||
|
||||
for result in results:
|
||||
background_tasks.add_task(process_docket, result)
|
||||
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
|
||||
|
||||
async def process_docket(result):
|
||||
async with httpx.AsyncClient() as session:
|
||||
await process_docket_result(result, session)
|
||||
|
||||
|
||||
async def process_docket_result(result, session):
|
||||
docket = str(result.get('docket'))
|
||||
case_code, case_shortname = get_case_details(docket)
|
||||
date_filed = result.get('date_filed', 'No Date Filed')
|
||||
|
||||
try:
|
||||
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
|
||||
except ValueError:
|
||||
date_filed_formatted = 'NoDateFiled'
|
||||
|
||||
# Fetching court docket information from the API
|
||||
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
|
||||
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
logging.info(f"Fetching CourtListener docket information for {docket}...")
|
||||
data = await response.json()
|
||||
court_docket = data['results'][0]['docket_number_core']
|
||||
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
||||
case_name = data['results'][0]['case_name']
|
||||
logging.info(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
|
||||
else:
|
||||
logging.info("Failed to fetch data from CourtListener API.")
|
||||
court_docket = 'NoCourtDocket'
|
||||
case_name = 'NoCaseName'
|
||||
|
||||
for document in result.get('recap_documents', []):
|
||||
filepath_ia = document.get('filepath_ia')
|
||||
filepath_local = document.get('filepath_local')
|
||||
|
||||
if filepath_ia:
|
||||
file_url = filepath_ia
|
||||
logging.info(f"Found IA file at {file_url}.")
|
||||
elif filepath_local:
|
||||
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
|
||||
logging.info(f"Found local file at {file_url}.")
|
||||
else:
|
||||
logging.info(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
|
||||
continue
|
||||
|
||||
document_number = document.get('document_number', 'NoDocumentNumber')
|
||||
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
|
||||
description = description[:50] # Truncate description
|
||||
# case_shortname = case_name # TEMPORARY OVERRIDE
|
||||
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
|
||||
target_path = Path(DOCKETS_FOLDER) / case_shortname / "Docket" / file_name
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
await download_file(file_url, target_path, session)
|
||||
logging.info(f"Downloaded {file_name} to {target_path}")
|
||||
|
||||
|
||||
def get_case_details(docket):
|
||||
case_info = CASE_TABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
|
||||
case_code = case_info.get("code")
|
||||
short_name = case_info.get("shortname")
|
||||
return case_code, short_name
|
||||
|
||||
|
||||
|
||||
async def download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
|
||||
}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
logging.info(f"Attempting to download {url} to {path}.")
|
||||
try:
|
||||
async with session.get(url, headers=headers, allow_redirects=True) as response:
|
||||
if response.status == 403:
|
||||
logging.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
|
||||
return
|
||||
response.raise_for_status()
|
||||
|
||||
# Check if the response content type is a PDF
|
||||
content_type = response.headers.get('Content-Type')
|
||||
if content_type != 'application/pdf':
|
||||
logging.error(f"Invalid content type: {content_type}. Skipping download.")
|
||||
return
|
||||
|
||||
# Create an in-memory buffer to store the downloaded content
|
||||
buffer = io.BytesIO()
|
||||
async for chunk in response.content.iter_chunked(1024):
|
||||
buffer.write(chunk)
|
||||
|
||||
# Reset the buffer position to the beginning
|
||||
buffer.seek(0)
|
||||
|
||||
# Validate the downloaded PDF content
|
||||
try:
|
||||
PdfReader(buffer)
|
||||
except Exception as e:
|
||||
logging.error(f"Invalid PDF content: {str(e)}. Skipping download.")
|
||||
return
|
||||
|
||||
# If the PDF is valid, write the content to the file on disk
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open('wb') as file:
|
||||
file.write(buffer.getvalue())
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading file: {str(e)}")
|
||||
|
||||
@hook.post("/cl/search")
|
||||
async def respond_search(request: Request, background_tasks: BackgroundTasks):
|
||||
client_ip = request.client.host
|
||||
logging.info(f"Received request from IP: {client_ip}")
|
||||
data = await request.json()
|
||||
payload = data['payload']
|
||||
results = data['payload']['results']
|
||||
|
||||
# Save the payload data
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_search.json"
|
||||
with open(payload_file, 'w') as file:
|
||||
json.dump(payload, file, indent=2)
|
||||
|
||||
for result in results:
|
||||
background_tasks.add_task(process_search_result, result)
|
||||
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
|
||||
|
||||
|
||||
async def process_search_result(result):
|
||||
async with httpx.AsyncClient() as session:
|
||||
download_url = result.get('download_url')
|
||||
court_id = result.get('court_id')
|
||||
case_name_short = result.get('caseNameShort')
|
||||
case_name = result.get('caseName')
|
||||
logging.info(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
|
||||
|
||||
court_folder = court_id
|
||||
|
||||
if case_name_short:
|
||||
case_folder = case_name_short
|
||||
else:
|
||||
case_folder = case_name
|
||||
|
||||
file_name = download_url.split('/')[-1]
|
||||
target_path = Path(SEARCH_FOLDER) / court_folder / case_folder / file_name
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
await download_file(download_url, target_path, session)
|
||||
logging.info(f"Downloaded {file_name} to {target_path}")
|
120
sijapi/helpers/courtlistener/downloadEarlier.py
Normal file
120
sijapi/helpers/courtlistener/downloadEarlier.py
Normal file
|
@ -0,0 +1,120 @@
|
|||
import json
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from selenium import webdriver
|
||||
import shutil
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
import os
|
||||
import time
|
||||
import zipfile
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
|
||||
COURTLISTENER_API_KEY = "efb5fe00f3c6c88d65a32541260945befdf53a7e"
|
||||
SYNC_FOLDER = Path(__file__).resolve().parent.parent
|
||||
DOCKETS_FOLDER = SYNC_FOLDER / "Documents - WELC-Docket"
|
||||
|
||||
SELENIUM_HOST = "http://10.13.37.11:4646"
|
||||
selenium_remote_url = f"{SELENIUM_HOST}/wd/hub"
|
||||
|
||||
with open('caseTable.json') as f:
|
||||
CASE_TABLE = json.load(f)
|
||||
|
||||
|
||||
def process_docket_result(docket_id):
|
||||
case_code = CASE_TABLE[docket_id].get("code")
|
||||
case_shortname = CASE_TABLE[docket_id].get("shortname")
|
||||
case_court = CASE_TABLE[docket_id].get("court") # docket_info.get("court")
|
||||
|
||||
# Fetching court docket information from the API
|
||||
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket_id}"
|
||||
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
logging.info(f"Fetching CourtListener docket information for {docket_id}...")
|
||||
data = response.json()
|
||||
absolute_url = data['results'][0]['absolute_url']
|
||||
court_docket = data['results'][0]['docket_number_core']
|
||||
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
||||
date_filed = data['results'][0]['date_filed']
|
||||
pacer_case_id = data['results'][0]['pacer_case_id']
|
||||
logging.info(f"Obtained from CourtListener: docket {court_docket}, date filed {date_filed}.")
|
||||
else:
|
||||
logging.info("Failed to fetch data from CourtListener API.")
|
||||
return
|
||||
|
||||
download_url_to_try = f"https://archive.org/compress/gov.uscourts.{case_court}.{pacer_case_id}/formats=TEXT%20PDF&file=/gov.uscourts.{case_court}.{pacer_case_id}.zip"
|
||||
|
||||
print(f"\n{download_url_to_try}\n")
|
||||
|
||||
directory_path = os.path.join(DOCKETS_FOLDER, case_shortname, "Docket")
|
||||
|
||||
# Create the directory if it doesn't exist
|
||||
os.makedirs(directory_path, exist_ok=True)
|
||||
|
||||
target_path = os.path.join(directory_path, case_code + "_Archive.zip")
|
||||
|
||||
if os.path.exists(target_path):
|
||||
logging.info(f"Using existing archive: {target_path}")
|
||||
else:
|
||||
response = requests.get(download_url_to_try)
|
||||
if response.status_code == 200:
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
logging.info(f"Downloaded archive: {target_path}")
|
||||
time.sleep(3)
|
||||
else:
|
||||
logging.info(f"Failed to download archive from {download_url_to_try}")
|
||||
return
|
||||
|
||||
# Extract the contents of the ZIP file
|
||||
with zipfile.ZipFile(target_path, 'r') as zip_ref:
|
||||
for member in zip_ref.infolist():
|
||||
filename = os.path.basename(member.filename)
|
||||
if filename.endswith('.pdf'):
|
||||
# Extract the docket entry from the filename
|
||||
match = re.search(r'(\d+)\.(\d+)\.pdf$', filename)
|
||||
if match:
|
||||
docket_entry_num = match.group(1)
|
||||
docket_entry_dec = match.group(2)
|
||||
if docket_entry_dec == '0':
|
||||
docket_entry = docket_entry_num
|
||||
else:
|
||||
docket_entry = f"{docket_entry_num}.{docket_entry_dec}"
|
||||
|
||||
# Construct the new filename
|
||||
new_filename = f"{case_code}_{docket_entry}.pdf"
|
||||
target_file_path = os.path.join(directory_path, new_filename)
|
||||
|
||||
# Extract the file with the new filename
|
||||
with open(target_file_path, 'wb') as target_file:
|
||||
with zip_ref.open(member) as source:
|
||||
shutil.copyfileobj(source, target_file)
|
||||
logging.info(f"Extracted {filename} as {new_filename}")
|
||||
|
||||
|
||||
def main():
|
||||
docket_ids = list(CASE_TABLE.keys())
|
||||
for docket_id in docket_ids:
|
||||
process_docket_result(docket_id)
|
||||
time.sleep(3)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
153
sijapi/helpers/courtlistener/downloadEarlier2.py
Normal file
153
sijapi/helpers/courtlistener/downloadEarlier2.py
Normal file
|
@ -0,0 +1,153 @@
|
|||
import json
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from selenium import webdriver
|
||||
import shutil
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
import os
|
||||
import time
|
||||
import zipfile
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import requests
|
||||
from PyPDF2 import PdfReader
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
COURTLISTENER_DOCKETS_URL = os.getenv("COURTLISTENER_DOCKETS_URL")
|
||||
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
|
||||
SYNC_FOLDER = Path(__file__).resolve().parent.parent
|
||||
DOCKETS_FOLDER = os.getenv("DOCKETS_FOLDER")
|
||||
SELENIUM_HOST=os.getenv("SELENIUM_HOST")
|
||||
selenium_remote_url = f"{SELENIUM_HOST}/wd/hub"
|
||||
|
||||
with open('caseTable.json') as f:
|
||||
CASE_TABLE = json.load(f)
|
||||
|
||||
def extract_date_from_pdf(pdf_path):
|
||||
with open(pdf_path, 'rb') as file:
|
||||
reader = PdfReader(file)
|
||||
page = reader.pages[0]
|
||||
text = page.extract_text()
|
||||
lines = text.split('\n')
|
||||
for line in lines[:2]:
|
||||
match = re.search(r'\b(\d{1,2})-(\d{1,2})-(\d{2})\b', line)
|
||||
if match:
|
||||
month, day, year = match.groups()
|
||||
if len(year) == 2:
|
||||
if int(year) > 24:
|
||||
year = '19' + year
|
||||
else:
|
||||
year = '20' + year
|
||||
date_str = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
|
||||
file_date = datetime.strptime(date_str, "%Y-%m-%d")
|
||||
if file_date >= datetime(1924, 5, 1) and file_date <= datetime(2024, 4, 30):
|
||||
return file_date.strftime("%Y%m%d")
|
||||
return None
|
||||
|
||||
|
||||
def process_docket_result(docket_id):
|
||||
case_code = CASE_TABLE[docket_id].get("code")
|
||||
case_shortname = CASE_TABLE[docket_id].get("shortname")
|
||||
case_court = CASE_TABLE[docket_id].get("court") # docket_info.get("court")
|
||||
|
||||
# Fetching court docket information from the API
|
||||
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket_id}"
|
||||
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
logging.info(f"Fetching CourtListener docket information for {docket_id}...")
|
||||
data = response.json()
|
||||
absolute_url = data['results'][0]['absolute_url']
|
||||
court_docket = data['results'][0]['docket_number_core']
|
||||
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
||||
date_filed = data['results'][0]['date_filed']
|
||||
pacer_case_id = data['results'][0]['pacer_case_id']
|
||||
logging.info(f"Obtained from CourtListener: docket {court_docket}, date filed {date_filed}.")
|
||||
else:
|
||||
logging.info("Failed to fetch data from CourtListener API.")
|
||||
return
|
||||
|
||||
download_url_to_try = f"https://archive.org/compress/gov.uscourts.{case_court}.{pacer_case_id}/formats=TEXT%20PDF&file=/gov.uscourts.{case_court}.{pacer_case_id}.zip"
|
||||
|
||||
print(f"\n{download_url_to_try}\n")
|
||||
|
||||
directory_path = os.path.join(DOCKETS_FOLDER, case_shortname, "Docket")
|
||||
|
||||
# Create the directory if it doesn't exist
|
||||
os.makedirs(directory_path, exist_ok=True)
|
||||
|
||||
target_path = os.path.join(directory_path, case_code + "_Archive.zip")
|
||||
|
||||
if os.path.exists(target_path):
|
||||
logging.info(f"Using existing archive: {target_path}")
|
||||
else:
|
||||
response = requests.get(download_url_to_try)
|
||||
if response.status_code == 200:
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
logging.info(f"Downloaded archive: {target_path}")
|
||||
time.sleep(3)
|
||||
else:
|
||||
logging.info(f"Failed to download archive from {download_url_to_try}")
|
||||
return
|
||||
|
||||
# Extract the contents of the ZIP file
|
||||
with zipfile.ZipFile(target_path, 'r') as zip_ref:
|
||||
for member in zip_ref.infolist():
|
||||
filename = os.path.basename(member.filename)
|
||||
if filename.endswith('.pdf'):
|
||||
# Extract the docket entry from the filename
|
||||
match = re.search(r'(\d+)\.(\d+)\.pdf$', filename)
|
||||
if match:
|
||||
docket_entry_num = match.group(1)
|
||||
docket_entry_dec = match.group(2)
|
||||
if docket_entry_dec == '0':
|
||||
docket_entry = docket_entry_num
|
||||
else:
|
||||
docket_entry = f"{docket_entry_num}.{docket_entry_dec}"
|
||||
|
||||
# Extract the date from the first two lines of the PDF
|
||||
with zip_ref.open(member) as source:
|
||||
temp_file_path = os.path.join(directory_path, 'temp.pdf')
|
||||
with open(temp_file_path, 'wb') as temp_file:
|
||||
shutil.copyfileobj(source, temp_file)
|
||||
pdf_date = extract_date_from_pdf(temp_file_path)
|
||||
os.remove(temp_file_path)
|
||||
|
||||
# Construct the new filename
|
||||
if pdf_date:
|
||||
new_filename = f"{case_code}_{docket_entry}_{pdf_date}.pdf"
|
||||
else:
|
||||
new_filename = f"{case_code}_{docket_entry}.pdf"
|
||||
target_file_path = os.path.join(directory_path, new_filename)
|
||||
|
||||
# Extract the file with the new filename
|
||||
with open(target_file_path, 'wb') as target_file:
|
||||
with zip_ref.open(member) as source:
|
||||
shutil.copyfileobj(source, target_file)
|
||||
logging.info(f"Extracted {filename} as {new_filename}")
|
||||
|
||||
|
||||
def main():
|
||||
docket_ids = list(CASE_TABLE.keys())
|
||||
for docket_id in docket_ids:
|
||||
process_docket_result(docket_id)
|
||||
time.sleep(3)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
32
sijapi/helpers/courtlistener/subscribeAlerts.py
Normal file
32
sijapi/helpers/courtlistener/subscribeAlerts.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
import json
|
||||
import requests
|
||||
|
||||
# Load the caseTable.json file
|
||||
with open('caseTable.json', 'r') as file:
|
||||
case_table = json.load(file)
|
||||
|
||||
# Set the base URL and authorization token
|
||||
base_url = "https://www.courtlistener.com/api/rest/v3/docket-alerts/"
|
||||
auth_token = "a90d3f2de489aa4138a32133ca8bfec9d85fecfa"
|
||||
|
||||
# Iterate through each key (docket ID) in the case table
|
||||
for docket_id in case_table.keys():
|
||||
# Set the data payload and headers for the request
|
||||
data = {'docket': docket_id}
|
||||
headers = {'Authorization': f'Token {auth_token}'}
|
||||
|
||||
try:
|
||||
# Send the POST request to the CourtListener API
|
||||
response = requests.post(base_url, data=data, headers=headers)
|
||||
|
||||
# Check the response status code
|
||||
if response.status_code == 200:
|
||||
print(f"Successfully created docket alert for docket ID: {docket_id}")
|
||||
else:
|
||||
print(f"Failed to create docket alert for docket ID: {docket_id}")
|
||||
print(f"Status code: {response.status_code}")
|
||||
print(f"Response content: {response.content}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error occurred while creating docket alert for docket ID: {docket_id}")
|
||||
print(f"Error message: {str(e)}")
|
146
sijapi/helpers/database/dbrestore.sh
Executable file
146
sijapi/helpers/database/dbrestore.sh
Executable file
|
@ -0,0 +1,146 @@
|
|||
#!/bin/bash
|
||||
|
||||
DB_NAME="weatherlocate.db"
|
||||
|
||||
# Step 1: Backup existing data
|
||||
echo "Backing up existing data..."
|
||||
sqlite3 $DB_NAME <<EOF
|
||||
.headers on
|
||||
.mode csv
|
||||
.output hourly_weather_backup.csv
|
||||
SELECT * FROM HourlyWeather;
|
||||
.output daily_weather_backup.csv
|
||||
SELECT * FROM DailyWeather;
|
||||
.output hours_backup.csv
|
||||
SELECT * FROM Hours;
|
||||
.output days_backup.csv
|
||||
SELECT * FROM Days;
|
||||
EOF
|
||||
|
||||
# Step 2: Drop and recreate tables
|
||||
echo "Dropping and recreating tables..."
|
||||
sqlite3 $DB_NAME <<EOF
|
||||
DROP TABLE IF EXISTS HourlyWeather;
|
||||
DROP TABLE IF EXISTS DailyWeather;
|
||||
DROP TABLE IF EXISTS Hours;
|
||||
DROP TABLE IF EXISTS Days;
|
||||
|
||||
CREATE TABLE HourlyWeather (
|
||||
id INTEGER PRIMARY KEY,
|
||||
datetime TEXT NOT NULL,
|
||||
temp REAL,
|
||||
feelslike REAL,
|
||||
humidity REAL,
|
||||
dew REAL,
|
||||
precip REAL,
|
||||
precipprob REAL,
|
||||
snow REAL,
|
||||
snowdepth REAL,
|
||||
windgust REAL,
|
||||
windspeed REAL,
|
||||
winddir REAL,
|
||||
pressure REAL,
|
||||
cloudcover REAL,
|
||||
visibility REAL,
|
||||
solarradiation REAL,
|
||||
solarenergy REAL,
|
||||
uvindex REAL,
|
||||
severerisk REAL,
|
||||
conditions TEXT,
|
||||
icon TEXT,
|
||||
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE DailyWeather (
|
||||
id INTEGER PRIMARY KEY,
|
||||
sunrise_time TEXT,
|
||||
sunset_time TEXT,
|
||||
description TEXT,
|
||||
tempmax REAL,
|
||||
tempmin REAL,
|
||||
uvindex REAL,
|
||||
winddir REAL,
|
||||
windspeedmean REAL,
|
||||
windspeed REAL,
|
||||
icon TEXT,
|
||||
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE Hours (
|
||||
id INTEGER PRIMARY KEY,
|
||||
day_id INTEGER,
|
||||
hour INTEGER,
|
||||
hourly_weather_id INTEGER,
|
||||
FOREIGN KEY (day_id) REFERENCES Days(id),
|
||||
FOREIGN KEY (hourly_weather_id) REFERENCES HourlyWeather(id)
|
||||
);
|
||||
|
||||
CREATE TABLE Days (
|
||||
id INTEGER PRIMARY KEY,
|
||||
date TEXT NOT NULL,
|
||||
daily_weather_id INTEGER,
|
||||
FOREIGN KEY (daily_weather_id) REFERENCES DailyWeather(id)
|
||||
);
|
||||
EOF
|
||||
|
||||
# Step 3: Import data from backup files
|
||||
echo "Importing data from backup files..."
|
||||
|
||||
python3 <<EOF
|
||||
import sqlite3
|
||||
import csv
|
||||
from datetime import datetime
|
||||
|
||||
def import_data():
|
||||
conn = sqlite3.connect('$DB_NAME')
|
||||
cursor = conn.cursor()
|
||||
|
||||
with open('hourly_weather_backup.csv', 'r') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
cursor.execute('''
|
||||
INSERT INTO HourlyWeather (datetime, temp, feelslike, humidity, dew, precip, precipprob, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy, uvindex, severerisk, conditions, icon, last_updated)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
row['datetime'], row['temp'], row['feelslike'], row['humidity'], row['dew'], row['precip'],
|
||||
row['precipprob'], row['snow'], row['snowdepth'], row['windgust'], row['windspeed'], row['winddir'],
|
||||
row['pressure'], row['cloudcover'], row['visibility'], row['solarradiation'], row['solarenergy'], row['uvindex'],
|
||||
row['severerisk'], row['conditions'], row['icon'],
|
||||
datetime.strptime(row['last_updated'], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
|
||||
))
|
||||
|
||||
with open('daily_weather_backup.csv', 'r') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
cursor.execute('''
|
||||
INSERT INTO DailyWeather (sunrise_time, sunset_time, description, tempmax, tempmin, uvindex, winddir, windspeedmean, windspeed, icon, last_updated)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
row['sunrise_time'], row['sunset_time'], row['description'], row['tempmax'], row['tempmin'],
|
||||
row['uvindex'], row['winddir'], row['windspeedmean'], row['windspeed'], row['icon'],
|
||||
datetime.strptime(row['last_updated'], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
|
||||
))
|
||||
|
||||
with open('hours_backup.csv', 'r') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
cursor.execute('''
|
||||
INSERT INTO Hours (day_id, hour, hourly_weather_id)
|
||||
VALUES (?, ?, ?)
|
||||
''', (row['day_id'], row['hour'], row['hourly_weather_id']))
|
||||
|
||||
with open('days_backup.csv', 'r') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
cursor.execute('''
|
||||
INSERT INTO Days (date, daily_weather_id)
|
||||
VALUES (?, ?)
|
||||
''', (row['date'], row['daily_weather_id']))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
import_data()
|
||||
EOF
|
||||
|
||||
echo "Database rebuild complete."
|
123
sijapi/helpers/database/mergedb.py
Normal file
123
sijapi/helpers/database/mergedb.py
Normal file
|
@ -0,0 +1,123 @@
|
|||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
# Get the home directory
|
||||
home_dir = Path.home()
|
||||
|
||||
# Define the path to the database
|
||||
DB = home_dir / "sync" / "sijapi" / "data" / "weatherlocate.db"
|
||||
|
||||
def create_database():
|
||||
with sqlite3.connect(DB) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create the Locations table
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS Locations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
street TEXT,
|
||||
city TEXT,
|
||||
state TEXT,
|
||||
country TEXT,
|
||||
latitude REAL,
|
||||
longitude REAL,
|
||||
zip TEXT,
|
||||
elevation REAL,
|
||||
last_updated DATETIME
|
||||
);
|
||||
''')
|
||||
|
||||
# Create the Days table with a direct reference to DailyWeather
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS Days (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
date DATE UNIQUE NOT NULL,
|
||||
daily_weather_id INTEGER,
|
||||
general_location_id INTEGER,
|
||||
FOREIGN KEY(daily_weather_id) REFERENCES DailyWeather(id),
|
||||
FOREIGN KEY(general_location_id) REFERENCES Locations(id)
|
||||
);
|
||||
''')
|
||||
|
||||
# Create the DailyWeather table with fields adjusted for direct CSV storage of preciptype
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS DailyWeather (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
sunrise TEXT,
|
||||
sunriseEpoch TEXT,
|
||||
sunset TEXT,
|
||||
sunsetEpoch TEXT,
|
||||
description TEXT,
|
||||
tempmax REAL,
|
||||
tempmin REAL,
|
||||
uvindex INTEGER,
|
||||
winddir REAL,
|
||||
windspeed REAL,
|
||||
icon TEXT,
|
||||
last_updated DATETIME,
|
||||
datetime TEXT,
|
||||
datetimeEpoch INTEGER,
|
||||
temp REAL,
|
||||
feelslikemax REAL,
|
||||
feelslikemin REAL,
|
||||
feelslike REAL,
|
||||
dew REAL,
|
||||
humidity REAL,
|
||||
precip REAL,
|
||||
precipprob REAL,
|
||||
precipcover REAL,
|
||||
preciptype TEXT,
|
||||
snow REAL,
|
||||
snowdepth REAL,
|
||||
windgust REAL,
|
||||
pressure REAL,
|
||||
cloudcover REAL,
|
||||
visibility REAL,
|
||||
solarradiation REAL,
|
||||
solarenergy REAL,
|
||||
severerisk REAL,
|
||||
moonphase REAL,
|
||||
conditions TEXT,
|
||||
stations TEXT,
|
||||
source TEXT
|
||||
);
|
||||
''')
|
||||
|
||||
# Create the HourlyWeather table
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS HourlyWeather (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
day_id INTEGER,
|
||||
datetime TEXT,
|
||||
datetimeEpoch INTEGER,
|
||||
temp REAL,
|
||||
feelslike REAL,
|
||||
humidity REAL,
|
||||
dew REAL,
|
||||
precip REAL,
|
||||
precipprob REAL,
|
||||
snow REAL,
|
||||
snowdepth REAL,
|
||||
preciptype TEXT,
|
||||
windgust REAL,
|
||||
windspeed REAL,
|
||||
winddir REAL,
|
||||
pressure REAL,
|
||||
cloudcover REAL,
|
||||
visibility REAL,
|
||||
solarradiation REAL,
|
||||
solarenergy REAL,
|
||||
uvindex REAL,
|
||||
severerisk REAL,
|
||||
conditions TEXT,
|
||||
icon TEXT,
|
||||
stations TEXT,
|
||||
source TEXT,
|
||||
FOREIGN KEY(day_id) REFERENCES Days(id)
|
||||
);
|
||||
''')
|
||||
|
||||
conn.commit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_database()
|
30
sijapi/helpers/database/mergedbtests.py
Normal file
30
sijapi/helpers/database/mergedbtests.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
from fastapi import FastAPI, HTTPException
|
||||
from typing import List
|
||||
import sqlite3
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
def get_db_connection():
|
||||
conn = sqlite3.connect('tracking.db')
|
||||
conn.row_factory = sqlite3.Row # This enables column access by name: row['column_name']
|
||||
return conn
|
||||
|
||||
@app.get("/location/{date}", response_model=List[str])
|
||||
async def read_location_zip(date: str):
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
print(f"Querying for date: {date}") # Debugging output
|
||||
cursor.execute('''
|
||||
SELECT L.zip FROM Hours H
|
||||
JOIN Days D ON H.day_id = D.id
|
||||
JOIN Locations L ON H.location_id = L.id
|
||||
WHERE D.date = ?
|
||||
''', (date,))
|
||||
zips = cursor.fetchall()
|
||||
print(f"Found zip codes: {zips}") # Debugging output
|
||||
conn.close()
|
||||
if not zips:
|
||||
raise HTTPException(status_code=404, detail="No location data found for this date")
|
||||
return [zip[0] for zip in zips]
|
||||
|
||||
|
89
sijapi/helpers/database/osm_geocode_upload.py
Normal file
89
sijapi/helpers/database/osm_geocode_upload.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
import osmium
|
||||
import psycopg2
|
||||
import json
|
||||
|
||||
from sijapi import DB_USER, DB_PASS, DB_HOST, DB, DATA_DIR
|
||||
|
||||
OSM_DATA_PATH = DATA_DIR / "north-america-latest.osm.pbf"
|
||||
|
||||
class OSMHandler(osmium.SimpleHandler):
|
||||
def __init__(self, conn):
|
||||
osmium.SimpleHandler.__init__(self)
|
||||
self.conn = conn
|
||||
|
||||
def node(self, n):
|
||||
tags = {tag.k: tag.v for tag in n.tags}
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("""
|
||||
INSERT INTO nodes (id, location, tags)
|
||||
VALUES (%s, ST_SetSRID(ST_MAKEPOINT(%s, %s),4326), %s)
|
||||
""",
|
||||
(n.id, n.location.lon, n.location.lat, json.dumps(tags)))
|
||||
self.conn.commit()
|
||||
|
||||
def way(self, w):
|
||||
nodes = [(node.lon, node.lat) for node in w.nodes]
|
||||
tags = {tag.k: tag.v for tag in w.tags}
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("""
|
||||
INSERT INTO ways (id, nodes, tags)
|
||||
VALUES (%s, %s, %s)
|
||||
""",
|
||||
(w.id, json.dumps(nodes), json.dumps(tags)))
|
||||
self.conn.commit()
|
||||
|
||||
def relation(self, r):
|
||||
members = [{"type": m.type, "ref": m.ref, "role": m.role} for m in r.members]
|
||||
tags = {tag.k: tag.v for tag in r.tags}
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("""
|
||||
INSERT INTO relations (id, members, tags)
|
||||
VALUES (%s, %s, %s)
|
||||
""",
|
||||
(r.id, json.dumps(members), json.dumps(tags)))
|
||||
self.conn.commit()
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(user=DB_USER, password=DB_PASS, dbname=DB, host=DB_HOST)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Drop existing tables if they exist
|
||||
cur.execute("DROP TABLE IF EXISTS nodes")
|
||||
cur.execute("DROP TABLE IF EXISTS ways")
|
||||
cur.execute("DROP TABLE IF EXISTS relations")
|
||||
|
||||
# Create tables for nodes, ways, and relations
|
||||
cur.execute("""
|
||||
CREATE TABLE nodes (
|
||||
id bigint PRIMARY KEY,
|
||||
location geography(POINT, 4326),
|
||||
tags jsonb
|
||||
)
|
||||
""")
|
||||
|
||||
cur.execute("""
|
||||
CREATE TABLE ways (
|
||||
id bigint PRIMARY KEY,
|
||||
nodes jsonb,
|
||||
tags jsonb
|
||||
)
|
||||
""")
|
||||
|
||||
cur.execute("""
|
||||
CREATE TABLE relations (
|
||||
id bigint PRIMARY KEY,
|
||||
members jsonb,
|
||||
tags jsonb
|
||||
)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
||||
handler = OSMHandler(conn)
|
||||
handler.apply_file(str(OSM_DATA_PATH))
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
95
sijapi/helpers/database/zip-init.py
Executable file
95
sijapi/helpers/database/zip-init.py
Executable file
|
@ -0,0 +1,95 @@
|
|||
import sqlite3
|
||||
import csv
|
||||
import logging
|
||||
|
||||
import sqlite3
|
||||
|
||||
def create_geonames_table(conn):
|
||||
cursor = conn.cursor()
|
||||
# Create table with required columns for geocode_location function
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS geonames (
|
||||
zip TEXT,
|
||||
city TEXT,
|
||||
state TEXT,
|
||||
country_code TEXT,
|
||||
latitude TEXT,
|
||||
longitude TEXT,
|
||||
region TEXT DEFAULT NULL,
|
||||
altitude TEXT DEFAULT NULL,
|
||||
street TEXT DEFAULT NULL
|
||||
);
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
# Data importation
|
||||
def import_geonames_data(conn, file_path):
|
||||
cursor = conn.cursor()
|
||||
with open(file_path, 'r', encoding='utf-8') as csvfile:
|
||||
reader = csv.reader(csvfile)
|
||||
next(reader) # Skip the header row
|
||||
for row in reader:
|
||||
if len(row) < 7: # Ensuring there are enough columns
|
||||
logging.warning("Skipped a line due to insufficient data.")
|
||||
continue
|
||||
try:
|
||||
cursor.execute("""
|
||||
INSERT INTO geonames (zip, latitude, longitude, city, state, region, country_code, altitude, street)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
|
||||
""", (row[0], row[1], row[2], row[3], row[4], row[5], 'US', None, None))
|
||||
except sqlite3.DatabaseError as e:
|
||||
logging.error(f"Database error: {e}")
|
||||
conn.commit()
|
||||
|
||||
# Geocode location based on ZIP
|
||||
def geocode_location(conn, zip_code):
|
||||
cursor = conn.cursor()
|
||||
query = """SELECT latitude, longitude FROM geonames WHERE zip = ?;"""
|
||||
cursor.execute(query, (zip_code,))
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
return result
|
||||
else:
|
||||
return None, None
|
||||
|
||||
# Test function to validate database setup and query
|
||||
def test_geocode_function(conn):
|
||||
# This tests a known ZIP code; replace '97401' with a ZIP code from your data
|
||||
latitude, longitude = geocode_location(conn, '97401')
|
||||
if latitude and longitude:
|
||||
print(f"Test passed! Latitude: {latitude}, Longitude: {longitude}")
|
||||
else:
|
||||
print("Test failed. No data returned.")
|
||||
|
||||
# Example test within the same script or a separate test script
|
||||
def test_geocode_function2(conn):
|
||||
# Example data insertion for testing
|
||||
conn.execute("INSERT INTO geonames (zip, city, state, country_code, latitude, longitude) VALUES ('99999', 'Testville', 'TestState', 'US', '45.0', '-93.0')")
|
||||
conn.commit()
|
||||
|
||||
# Test retrieval
|
||||
location_info = geocode_location(conn, zip_code='99999')
|
||||
print(location_info)
|
||||
|
||||
# Assuming you call this test function from your main or setup
|
||||
|
||||
|
||||
# Main execution function
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
db_path = 'geonames.db'
|
||||
file_path = 'US.csv'
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
create_geonames_table(conn)
|
||||
import_geonames_data(conn, file_path)
|
||||
|
||||
# Run the test
|
||||
test_geocode_function(conn)
|
||||
test_geocode_function2(conn)
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
76
sijapi/helpers/embeddings/embed.py
Normal file
76
sijapi/helpers/embeddings/embed.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
from vectordb import Memory
|
||||
|
||||
memory = Memory(memory_file="embedding.pt",
|
||||
chunking_strategy={"mode": "sliding_window", "window_size": 128, "overlap": 16}, embeddings='TaylorAI/bge-micro-v2'
|
||||
)
|
||||
|
||||
texts = [
|
||||
"""
|
||||
Machine learning is a method of data analysis that automates analytical model building.
|
||||
|
||||
It is a branch of artificial intelligence based on the idea that systems can learn from data,
|
||||
identify patterns and make decisions with minimal human intervention.
|
||||
|
||||
Machine learning algorithms are trained on data sets that contain examples of the desired output. For example, a machine learning algorithm that is used to classify images might be trained on a data set that contains images of cats and dogs.
|
||||
Once an algorithm is trained, it can be used to make predictions on new data. For example, the machine learning algorithm that is used to classify images could be used to predict whether a new image contains a cat or a dog.
|
||||
|
||||
Machine learning algorithms can be used to solve a wide variety of problems. Some common applications of machine learning include:
|
||||
|
||||
Classification: Categorizing data into different groups. For example, a machine learning algorithm could be used to classify emails as spam or not spam.
|
||||
|
||||
Regression: Predicting a continuous value. For example, a machine learning algorithm could be used to predict the price of a house.
|
||||
|
||||
Clustering: Finding groups of similar data points. For example, a machine learning algorithm could be used to find groups of customers with similar buying habits.
|
||||
|
||||
Anomaly detection: Finding data points that are different from the rest of the data. For example, a machine learning algorithm could be used to find fraudulent credit card transactions.
|
||||
|
||||
Machine learning is a powerful tool that can be used to solve a wide variety of problems. As the amount of data available continues to grow, machine learning is likely to become even more important in the future.
|
||||
""",
|
||||
"""
|
||||
Artificial intelligence (AI) is the simulation of human intelligence in machines
|
||||
that are programmed to think like humans and mimic their actions.
|
||||
|
||||
The term may also be applied to any machine that exhibits traits associated with
|
||||
a human mind such as learning and problem-solving.
|
||||
|
||||
AI research has been highly successful in developing effective techniques for solving a wide range of problems, from game playing to medical diagnosis.
|
||||
|
||||
However, there is still a long way to go before AI can truly match the intelligence of humans. One of the main challenges is that human intelligence is incredibly complex and poorly understood.
|
||||
|
||||
Despite the challenges, AI is a rapidly growing field with the potential to revolutionize many aspects of our lives. Some of the potential benefits of AI include:
|
||||
|
||||
Increased productivity: AI can be used to automate tasks that are currently performed by humans, freeing up our time for more creative and fulfilling activities.
|
||||
|
||||
Improved decision-making: AI can be used to make more informed decisions, based on a wider range of data than humans can typically access.
|
||||
|
||||
Enhanced creativity: AI can be used to generate new ideas and solutions, beyond what humans can imagine on their own.
|
||||
Of course, there are also potential risks associated with AI, such as:
|
||||
|
||||
Job displacement: As AI becomes more capable, it is possible that it will displace some human workers.
|
||||
|
||||
Weaponization: AI could be used to develop new weapons that are more powerful and destructive than anything we have today.
|
||||
|
||||
Loss of control: If AI becomes too powerful, we may lose control over it, with potentially disastrous consequences.
|
||||
|
||||
It is important to weigh the potential benefits and risks of AI carefully as we continue to develop this technology. With careful planning and oversight, AI has the potential to make the world a better place. However, if we are not careful, it could also lead to serious problems.
|
||||
""",
|
||||
]
|
||||
|
||||
metadata_list = [
|
||||
{
|
||||
"title": "Introduction to Machine Learning",
|
||||
"url": "https://example.com/introduction-to-machine-learning",
|
||||
},
|
||||
{
|
||||
"title": "Introduction to Artificial Intelligence",
|
||||
"url": "https://example.com/introduction-to-artificial-intelligence",
|
||||
},
|
||||
]
|
||||
|
||||
memory.save(texts, metadata_list)
|
||||
|
||||
query = "What is the relationship between AI and machine learning?"
|
||||
results = memory.search(query, top_n=3, unique=True)
|
||||
print(results)
|
||||
|
||||
# two results will be returned as unique param is set to True
|
15
sijapi/helpers/embeddings/embeddings.py
Normal file
15
sijapi/helpers/embeddings/embeddings.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
from vectordb import Memory
|
||||
|
||||
# Memory is where all content you want to store/search goes.
|
||||
memory = Memory()
|
||||
|
||||
memory.save(
|
||||
["apples are green", "oranges are orange"], # save your text content. for long text we will automatically chunk it
|
||||
[{"url": "https://apples.com"}, {"url": "https://oranges.com"}], # associate any kind of metadata with it (optional)
|
||||
)
|
||||
|
||||
# Search for top n relevant results, automatically using embeddings
|
||||
query = "green"
|
||||
results = memory.search(query, top_n = 1)
|
||||
|
||||
print(results)
|
46
sijapi/helpers/mobile/widget.shell
Executable file
46
sijapi/helpers/mobile/widget.shell
Executable file
|
@ -0,0 +1,46 @@
|
|||
# Updates watch complications for Secure ShellFish
|
||||
#
|
||||
# This command sends encrypted data through push notifications such
|
||||
# that it doesn't need to run from a Secure ShellFish terminal.
|
||||
|
||||
if [[ $# -eq 0 ]]; then
|
||||
cat <<EOF
|
||||
# Usage: widget [target] <data> ...
|
||||
|
||||
# Update complication on device from which this function was installed with a number of content parameters that can be string, progress, icon, target or color.
|
||||
|
||||
# Each argument type is derived from input.
|
||||
|
||||
# Progress has the form: 50% or 110/220
|
||||
|
||||
# Icon must match valid SF Symbol name such as globe or terminal.fill
|
||||
|
||||
# Colors must be hex colours such as #000 #ff00ff where the color is used for later content and 'foreground' switches back to default colour
|
||||
|
||||
# Target is used to send different content to different complications after configuring the complications with different target identifiers which requires the pro unlock. The target parameter is never assumed unless --target is used and is effective until next --target parameter allowing updates of several complications with a single command
|
||||
|
||||
# You can configure complications to only show content for a given target.
|
||||
|
||||
# String is the fallback type if nothing else matches, but content type can be forced for next parameter with --progress, --icon, --color, --text or --target with
|
||||
# something like:
|
||||
|
||||
widget --text "50/100"
|
||||
|
||||
# You can update several complications at once by using --target to send all parameters until the next --target to a particular complication. Updating several complications at once allows more total updates per day.
|
||||
|
||||
# EOF
|
||||
# return 0
|
||||
# fi
|
||||
|
||||
# local key=d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b
|
||||
# local user=WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm
|
||||
# local iv=ab5bbeb426015da7eedcee8bee3dffb7
|
||||
|
||||
# local plain=$(
|
||||
# echo Secure ShellFish Widget 2.0
|
||||
# for var in "$@"
|
||||
# do
|
||||
# echo -ne "$var" | base64
|
||||
# done)
|
||||
# local base64=$(echo "$plain" | openssl enc -aes-256-cbc -base64 -K $key -iv $iv)
|
||||
# curl -sS -X POST -H "Content-Type: text/plain" --data "$base64" "https://secureshellfish.app/push/?user=$user"
|
17
sijapi/helpers/obsidian/month_o_banners.sh
Executable file
17
sijapi/helpers/obsidian/month_o_banners.sh
Executable file
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Iterate from 18 to 30
|
||||
for i in $(seq -w 01 30); do
|
||||
# Construct the date string
|
||||
DATE="2024-06-${i}"
|
||||
|
||||
# Print the date being processed (optional)
|
||||
echo "Processing date: $DATE"
|
||||
|
||||
# Run the curl command
|
||||
curl -X POST -H "Content-Type: application/json" -d '{"mood": "joyful"}' "http://localhost:4444/note/banner?dt=$DATE"
|
||||
|
||||
# Wait for the curl command to finish before starting the next iteration
|
||||
wait
|
||||
done
|
||||
|
71
sijapi/helpers/obsidian/populateWorklogs.py
Normal file
71
sijapi/helpers/obsidian/populateWorklogs.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
import requests
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from sijapi import BASE_URL, OBSIDIAN_JOURNAL_DIR, GLOBAL_API_KEY
|
||||
|
||||
def create_folder_path(date):
|
||||
year = date.strftime("%Y")
|
||||
month = date.strftime("%Y-%m %B")
|
||||
day = date.strftime("%Y-%m-%d %A")
|
||||
path = f"{OBSIDIAN_JOURNAL_DIR}/{year}/{month}/{day}"
|
||||
os.makedirs(path, exist_ok=True)
|
||||
return f"{path}/{day}.md"
|
||||
|
||||
def fetch_markdown(date):
|
||||
url = f"{BASE_URL}/time/markdown2?start_date={date.strftime('%Y-%m-%d')}"
|
||||
headers = {'Authorization': f'Bearer {GLOBAL_API_KEY}'}
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
else:
|
||||
print(f"Failed to fetch data for {date.strftime('%Y-%m-%d')}: HTTP {response.status_code}")
|
||||
return None
|
||||
|
||||
def update_markdown_file(file_path, new_markdown):
|
||||
# table_regex = r"(?ms)^## Timeslips\s*\n{1,3}\|.*?\|.*?\|.*?\|\s*\n"
|
||||
# table_regex = r"(?ms)^## Timeslips\s*\n{1,3}(\|.*?\|.*?\|.*?\|\s*\n)+\|[- ]+\|[- ]+\|[- ]+\|\s*\n"
|
||||
# table_regex = r"(?ms)^## Timeslips\s*\n{1,3}(\|\s*[^|\n]+?\s*\|)+\s*\n(\|\s*-+\s*\|)+\s*\n(\|\s*[^|\n]*?\s*\|+\s*\n)+?\|\s*TOTAL\s*\|[^|\n]*\|\s*[^|\n]*\|\s*$"
|
||||
table_regex = r"(?ms)^## Timeslips\s*\n{1,3}(\|[^|\r\n]*\|[^|\r\n]*\|[^|\r\n]*\|\s*\n)+\|\s*-+\s*\|\s*-+\s*\|\s*-+\s*\|\s*\n((\|[^|\r\n]*\|[^|\r\n]*\|[^|\r\n]*\|\s*\n)+)\|\s*TOTAL\s*\|\s*\|\s*[^|\r\n]*\|\s*$"
|
||||
|
||||
try:
|
||||
with open(file_path, 'r+') as file:
|
||||
content = file.read()
|
||||
if re.search(table_regex, content):
|
||||
updated_content = re.sub(table_regex, new_markdown, content)
|
||||
else:
|
||||
updated_content = content + '\n' + new_markdown
|
||||
|
||||
file.seek(0)
|
||||
file.write(updated_content)
|
||||
file.truncate()
|
||||
except FileNotFoundError:
|
||||
with open(file_path, 'w') as file:
|
||||
file.write(new_markdown)
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2 or len(sys.argv) > 3:
|
||||
print("Usage: python populateWorklogs.py <start_date> [end_date]")
|
||||
sys.exit(1)
|
||||
|
||||
start_date_str = sys.argv[1]
|
||||
if len(sys.argv) == 3:
|
||||
end_date_str = sys.argv[2]
|
||||
else:
|
||||
end_date_str = start_date_str
|
||||
|
||||
start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
|
||||
end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
|
||||
current_date = start_date
|
||||
|
||||
while current_date <= end_date:
|
||||
file_path = create_folder_path(current_date)
|
||||
markdown = fetch_markdown(current_date)
|
||||
if markdown:
|
||||
update_markdown_file(file_path, markdown)
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
2373
sijapi/helpers/scrapers/Readability.js
Normal file
2373
sijapi/helpers/scrapers/Readability.js
Normal file
File diff suppressed because it is too large
Load diff
65
sijapi/logs.py
Normal file
65
sijapi/logs.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from colorama import Fore, Style, init as colorama_init
|
||||
import traceback
|
||||
|
||||
colorama_init(autoreset=True)
|
||||
|
||||
class ColorFormatter(logging.Formatter):
|
||||
"""Custom formatter to add colors to log levels."""
|
||||
COLOR_MAP = {
|
||||
logging.DEBUG: Fore.GREEN,
|
||||
logging.INFO: Fore.LIGHTBLUE_EX,
|
||||
logging.WARNING: Fore.YELLOW,
|
||||
logging.ERROR: Fore.RED,
|
||||
logging.CRITICAL: Fore.MAGENTA,
|
||||
}
|
||||
|
||||
def format(self, record):
|
||||
# Colorize only the level name for INFO
|
||||
if record.levelno == logging.INFO:
|
||||
record.levelname = f"{self.COLOR_MAP[logging.INFO]}{record.levelname}{Style.RESET_ALL}"
|
||||
record.msg = f"{record.msg}"
|
||||
else:
|
||||
# Colorize the entire message for other levels
|
||||
color = self.COLOR_MAP.get(record.levelno, Fore.WHITE)
|
||||
record.msg = f"{color}{record.msg}{Style.RESET_ALL}"
|
||||
record.levelname = f"{color}{record.levelname}{Style.RESET_ALL}"
|
||||
return super().format(record)
|
||||
|
||||
def get_level(level_str):
|
||||
"""Convert a log level string to a logging level constant."""
|
||||
level_str = level_str.upper()
|
||||
if level_str == "DEBUG":
|
||||
return logging.DEBUG
|
||||
elif level_str == "INFO":
|
||||
return logging.INFO
|
||||
elif level_str == "WARNING":
|
||||
return logging.WARNING
|
||||
elif level_str == "ERROR":
|
||||
return logging.ERROR
|
||||
elif level_str == "CRITICAL":
|
||||
return logging.CRITICAL
|
||||
else:
|
||||
raise ValueError(f"Invalid log level: {level_str}")
|
||||
|
||||
class Logger:
|
||||
def __init__(self, logs_dir, log_level_str="INFO"):
|
||||
self.logs_dir = logs_dir
|
||||
self.log_level_str = log_level_str
|
||||
self.logger = logging.getLogger("LOG")
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
"""Function to setup the logger, clears any existing handlers first"""
|
||||
log_level = get_level(self.log_level_str)
|
||||
self.logger.setLevel(log_level)
|
||||
handler = RotatingFileHandler(f'{self.logs_dir}/app.log', maxBytes=2000000, backupCount=10)
|
||||
console_handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
color_formatter = ColorFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
console_handler.setFormatter(color_formatter)
|
||||
self.logger.addHandler(handler)
|
||||
self.logger.addHandler(console_handler)
|
||||
self.logger.propagate = False
|
165
sijapi/routers/asr.py
Normal file
165
sijapi/routers/asr.py
Normal file
|
@ -0,0 +1,165 @@
|
|||
'''
|
||||
Automatic Speech Recognition module relying on the `whisper_cpp` implementation of OpenAI's Whisper model.
|
||||
Depends on:
|
||||
LOGGER, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR
|
||||
Notes:
|
||||
Performs exceptionally well on Apple Silicon. Other devices will benefit from future updates to optionally use `faster_whisper`, `insanely_faster_whisper`, and/or `whisper_jax`.
|
||||
'''
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Form, UploadFile, File
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
import tempfile
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from whisperplus.pipelines import mlx_whisper
|
||||
from youtube_dl import YoutubeDL
|
||||
from urllib.parse import unquote
|
||||
import subprocess
|
||||
import os
|
||||
import uuid
|
||||
from threading import Thread
|
||||
import multiprocessing
|
||||
import asyncio
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES
|
||||
|
||||
|
||||
asr = APIRouter()
|
||||
|
||||
class TranscribeParams(BaseModel):
|
||||
model: str = Field(default="small")
|
||||
output_srt : Optional[bool] = Field(default=False)
|
||||
language : Optional[str] = Field(None)
|
||||
split_on_word : Optional[bool] = Field(default=False)
|
||||
temperature : Optional[float] = Field(default=0)
|
||||
temp_increment : Optional[int] = Field(None)
|
||||
translate : Optional[bool] = Field(default=False)
|
||||
diarize : Optional[bool] = Field(default=False)
|
||||
tiny_diarize : Optional[bool] = Field(default=False)
|
||||
no_fallback : Optional[bool] = Field(default=False)
|
||||
output_json : Optional[bool] = Field(default=False)
|
||||
detect_language : Optional[bool] = Field(default=False)
|
||||
dtw : Optional[str] = Field(None)
|
||||
threads : Optional[int] = Field(None)
|
||||
|
||||
from urllib.parse import unquote
|
||||
import json
|
||||
|
||||
@asr.post("/asr")
|
||||
@asr.post("/transcribe")
|
||||
@asr.post("/v1/audio/transcription")
|
||||
async def transcribe_endpoint(
|
||||
file: UploadFile = File(...),
|
||||
params: str = Form(...)
|
||||
):
|
||||
try:
|
||||
# Decode the URL-encoded string
|
||||
decoded_params = unquote(params)
|
||||
|
||||
# Parse the JSON string
|
||||
parameters_dict = json.loads(decoded_params)
|
||||
|
||||
# Create TranscribeParams object
|
||||
parameters = TranscribeParams(**parameters_dict)
|
||||
except json.JSONDecodeError as json_err:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid JSON: {str(json_err)}")
|
||||
except Exception as err:
|
||||
raise HTTPException(status_code=400, detail=f"Error parsing parameters: {str(err)}")
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||
temp_file.write(await file.read())
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
transcription = await transcribe_audio(file_path=temp_file_path, params=parameters)
|
||||
return transcription
|
||||
|
||||
async def transcribe_audio(file_path, params: TranscribeParams):
|
||||
|
||||
file_path = convert_to_wav(file_path)
|
||||
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
|
||||
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
|
||||
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
|
||||
command.extend(['-m', str(model_path)])
|
||||
command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
|
||||
command.extend(['-np']) # Always enable no-prints
|
||||
|
||||
if params.split_on_word:
|
||||
command.append('-sow')
|
||||
if params.temperature > 0:
|
||||
command.extend(['-tp', str(params.temperature)])
|
||||
if params.temp_increment:
|
||||
command.extend(['-tpi', str(params.temp_increment)])
|
||||
if params.language:
|
||||
command.extend(['-l', params.language])
|
||||
elif params.detect_language:
|
||||
command.append('-dl')
|
||||
if params.translate:
|
||||
command.append('-tr')
|
||||
if params.diarize:
|
||||
command.append('-di')
|
||||
if params.tiny_diarize:
|
||||
command.append('-tdrz')
|
||||
if params.no_fallback:
|
||||
command.append('-nf')
|
||||
if params.output_srt:
|
||||
command.append('-osrt')
|
||||
elif params.output_json:
|
||||
command.append('-oj')
|
||||
else:
|
||||
command.append('-nt')
|
||||
if params.dtw:
|
||||
command.extend(['--dtw', params.dtw])
|
||||
|
||||
command.extend(['-f', file_path])
|
||||
|
||||
DEBUG(f"Command: {command}")
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
|
||||
if proc.returncode != 0:
|
||||
raise Exception(f"Error running command: {stderr.decode()}")
|
||||
|
||||
result = stdout.decode().strip()
|
||||
DEBUG(f"Result: {result}")
|
||||
return result
|
||||
|
||||
|
||||
def convert_to_wav(file_path: str):
|
||||
wav_file_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.wav")
|
||||
subprocess.run(["ffmpeg", "-y", "-i", file_path, "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_path], check=True)
|
||||
return wav_file_path
|
||||
def download_from_youtube(url: str):
|
||||
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
|
||||
ytdl_opts = {
|
||||
'outtmpl': temp_file,
|
||||
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
|
||||
'nooverwrites': True
|
||||
}
|
||||
with YoutubeDL(ytdl_opts) as ydl:
|
||||
ydl.download([url])
|
||||
return convert_to_wav(temp_file)
|
||||
|
||||
def format_srt_timestamp(seconds: float):
|
||||
milliseconds = round(seconds * 1000.0)
|
||||
hours = milliseconds // 3_600_000
|
||||
milliseconds -= hours * 3_600_000
|
||||
minutes = milliseconds // 60_000
|
||||
milliseconds -= minutes * 60_000
|
||||
seconds = milliseconds // 1_000
|
||||
milliseconds -= seconds * 1_000
|
||||
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
|
||||
|
||||
def write_srt(segments: list, output_file: str):
|
||||
with open(output_file, 'w') as f:
|
||||
for i, segment in enumerate(segments, start=1):
|
||||
start = format_srt_timestamp(segment['start'])
|
||||
end = format_srt_timestamp(segment['end'])
|
||||
text = segment['text']
|
||||
f.write(f"{i}\n{start} --> {end}\n{text}\n\n")
|
415
sijapi/routers/calendar.py
Normal file
415
sijapi/routers/calendar.py
Normal file
|
@ -0,0 +1,415 @@
|
|||
'''
|
||||
Calendar module using macOS Calendars and/or Microsoft 365 via its Graph API.
|
||||
Depends on:
|
||||
LOGGER, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
|
||||
'''
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
||||
from fastapi.responses import RedirectResponse, JSONResponse
|
||||
from fastapi.security import OAuth2PasswordBearer
|
||||
import httpx
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from dateutil.parser import isoparse as parse_iso
|
||||
import threading
|
||||
from typing import Dict, List, Any
|
||||
from datetime import datetime, timedelta
|
||||
from Foundation import NSDate, NSRunLoop
|
||||
import EventKit as EK
|
||||
from sijapi import ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
|
||||
from sijapi.utilities import localize_dt, localize_dt
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
|
||||
calendar = APIRouter()
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
|
||||
timeout = httpx.Timeout(12)
|
||||
|
||||
if MS365_TOGGLE is True:
|
||||
CRITICAL(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
|
||||
|
||||
@calendar.get("/o365/login")
|
||||
async def login():
|
||||
DEBUG(f"Received request to /o365/login")
|
||||
DEBUG(f"SCOPE: {MS365_SCOPE}")
|
||||
if not MS365_SCOPE:
|
||||
ERR("No scopes defined for authorization.")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="No scopes defined for authorization."
|
||||
)
|
||||
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
|
||||
INFO(f"Redirecting to authorization URL: {authorization_url}")
|
||||
return RedirectResponse(authorization_url)
|
||||
|
||||
@calendar.get("/o365/oauth_redirect")
|
||||
async def oauth_redirect(code: str = None, error: str = None):
|
||||
INFO(f"Received request to /o365/oauth_redirect")
|
||||
if error:
|
||||
ERR(f"OAuth2 Error: {error}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
|
||||
)
|
||||
INFO(f"Requesting token with authorization code: {code}")
|
||||
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
|
||||
data = {
|
||||
"client_id": MS365_CLIENT_ID,
|
||||
"client_secret": MS365_SECRET,
|
||||
"code": code,
|
||||
"redirect_uri": MS365_REDIRECT_PATH,
|
||||
"grant_type": "authorization_code"
|
||||
}
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.post(token_url, data=data)
|
||||
DEBUG(f"Token endpoint response status code: {response.status_code}")
|
||||
INFO(f"Token endpoint response text: {response.text}")
|
||||
result = response.json()
|
||||
if 'access_token' in result:
|
||||
await save_token(result)
|
||||
INFO("Access token obtained successfully")
|
||||
return {"message": "Access token stored successfully"}
|
||||
else:
|
||||
CRITICAL(f"Failed to obtain access token. Response: {result}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to obtain access token"
|
||||
)
|
||||
|
||||
@calendar.get("/o365/me")
|
||||
async def read_items():
|
||||
INFO(f"Received request to /o365/me")
|
||||
token = await load_token()
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Access token not found",
|
||||
)
|
||||
graph_url = "https://graph.microsoft.com/v1.0/me"
|
||||
headers = {"Authorization": f"Bearer {token['access_token']}"}
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.get(graph_url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
user = response.json()
|
||||
INFO(f"User retrieved: {user}")
|
||||
return user
|
||||
else:
|
||||
ERR("Invalid or expired token")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid or expired token",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
async def save_token(token):
|
||||
DEBUG(f"Saving token: {token}")
|
||||
try:
|
||||
token["expires_at"] = int(time.time()) + token["expires_in"]
|
||||
with open(MS365_TOKEN_PATH, "w") as file:
|
||||
json.dump(token, file)
|
||||
DEBUG(f"Saved token to {MS365_TOKEN_PATH}")
|
||||
except Exception as e:
|
||||
ERR(f"Failed to save token: {e}")
|
||||
|
||||
async def load_token():
|
||||
if os.path.exists(MS365_TOKEN_PATH):
|
||||
try:
|
||||
with open(MS365_TOKEN_PATH, "r") as file:
|
||||
token = json.load(file)
|
||||
except FileNotFoundError:
|
||||
ERR("Token file not found.")
|
||||
return None
|
||||
except json.JSONDecodeError:
|
||||
ERR("Failed to decode token JSON")
|
||||
return None
|
||||
|
||||
if token:
|
||||
token["expires_at"] = int(time.time()) + token["expires_in"]
|
||||
DEBUG(f"Loaded token: {token}") # Add this line to log the loaded token
|
||||
return token
|
||||
else:
|
||||
DEBUG("No token found.")
|
||||
return None
|
||||
else:
|
||||
WARN(f"No file found at {MS365_TOKEN_PATH}")
|
||||
return None
|
||||
|
||||
|
||||
async def is_token_expired(token):
|
||||
if "expires_at" not in token:
|
||||
return True # Treat missing expiration time as expired token
|
||||
expiry_time = datetime.fromtimestamp(token["expires_at"])
|
||||
return expiry_time <= datetime.now()
|
||||
|
||||
async def is_token_expired2(token):
|
||||
graph_url = "https://graph.microsoft.com/v1.0/me"
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.get(graph_url, headers=headers)
|
||||
return response.status_code == 401
|
||||
|
||||
async def get_new_token_with_refresh_token(refresh_token):
|
||||
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
|
||||
data = {
|
||||
"client_id": MS365_CLIENT_ID,
|
||||
"client_secret": MS365_SECRET,
|
||||
"refresh_token": refresh_token,
|
||||
"grant_type": "refresh_token",
|
||||
"scope": " ".join(MS365_SCOPE),
|
||||
}
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.post(token_url, data=data)
|
||||
result = response.json()
|
||||
if "access_token" in result:
|
||||
INFO("Access token refreshed successfully")
|
||||
return result
|
||||
else:
|
||||
ERR("Failed to refresh access token")
|
||||
return None
|
||||
|
||||
|
||||
async def refresh_token():
|
||||
token = await load_token()
|
||||
if not token:
|
||||
ERR("No token found in storage")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="No token found",
|
||||
)
|
||||
|
||||
if 'refresh_token' not in token:
|
||||
ERR("Refresh token not found in the loaded token")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Refresh token not found",
|
||||
)
|
||||
|
||||
refresh_token = token['refresh_token']
|
||||
DEBUG("Found refresh token, attempting to refresh access token")
|
||||
|
||||
new_token = await get_new_token_with_refresh_token(refresh_token)
|
||||
|
||||
if new_token:
|
||||
await save_token(new_token)
|
||||
INFO("Token refreshed and saved successfully")
|
||||
else:
|
||||
ERR("Failed to refresh token")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to refresh token",
|
||||
)
|
||||
|
||||
|
||||
def get_calendar_ids() -> Dict[str, str]:
|
||||
event_store = EK.EKEventStore.alloc().init()
|
||||
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
|
||||
|
||||
calendar_identifiers = {
|
||||
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
|
||||
}
|
||||
INFO(f"{calendar_identifiers}")
|
||||
return calendar_identifiers
|
||||
|
||||
# Helper to convert datetime to NSDate
|
||||
def datetime_to_nsdate(dt: datetime) -> NSDate:
|
||||
return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp())
|
||||
|
||||
|
||||
@calendar.get("/events")
|
||||
async def get_events_endpoint(start_date: str, end_date: str):
|
||||
start_dt = localize_dt(start_date)
|
||||
end_dt = localize_dt(end_date)
|
||||
datetime.strptime(start_date, "%Y-%m-%d") or datetime.now()
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d") or datetime.now()
|
||||
response = await get_events(start_dt, end_dt)
|
||||
return JSONResponse(content=response, status_code=200)
|
||||
|
||||
async def get_events(start_dt: datetime, end_dt: datetime) -> List:
|
||||
combined_events = []
|
||||
if MS365_TOGGLE:
|
||||
ms_events = await get_ms365_events(start_dt, end_dt)
|
||||
combined_events.extend(ms_events) # Use extend instead of append
|
||||
|
||||
if ICAL_TOGGLE:
|
||||
calendar_ids = ICALENDARS
|
||||
macos_events = get_macos_calendar_events(start_dt, end_dt, calendar_ids)
|
||||
combined_events.extend(macos_events) # Use extend instead of append
|
||||
|
||||
parsed_events = await parse_calendar_for_day(start_dt, end_dt, combined_events)
|
||||
return parsed_events
|
||||
|
||||
|
||||
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
|
||||
event_store = EK.EKEventStore.alloc().init()
|
||||
|
||||
# Request access to EventKit
|
||||
def request_access() -> bool:
|
||||
access_granted = []
|
||||
|
||||
def completion_handler(granted, error):
|
||||
if error is not None:
|
||||
ERR(f"Error: {error}")
|
||||
access_granted.append(granted)
|
||||
# Notify the main thread that the completion handler has executed
|
||||
with access_granted_condition:
|
||||
access_granted_condition.notify()
|
||||
|
||||
access_granted_condition = threading.Condition()
|
||||
with access_granted_condition:
|
||||
event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent
|
||||
# Wait for the completion handler to be called
|
||||
access_granted_condition.wait(timeout=10)
|
||||
# Verify that the handler was called and access_granted is not empty
|
||||
if access_granted:
|
||||
return access_granted[0]
|
||||
else:
|
||||
ERR("Request access timed out or failed")
|
||||
return False
|
||||
|
||||
if not request_access():
|
||||
ERR("Access to calendar data was not granted")
|
||||
return []
|
||||
|
||||
ns_start_date = datetime_to_nsdate(start_date)
|
||||
ns_end_date = datetime_to_nsdate(end_date)
|
||||
|
||||
# Retrieve all calendars
|
||||
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
|
||||
if calendar_ids:
|
||||
selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids]
|
||||
else:
|
||||
selected_calendars = all_calendars
|
||||
|
||||
# Filtering events by selected calendars
|
||||
predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars)
|
||||
events = event_store.eventsMatchingPredicate_(predicate)
|
||||
|
||||
event_list = []
|
||||
for event in events:
|
||||
# Check if event.attendees() returns None
|
||||
if event.attendees():
|
||||
attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()]
|
||||
else:
|
||||
attendees = []
|
||||
|
||||
# Format the start and end dates properly
|
||||
start_date_str = event.startDate().descriptionWithLocale_(None)
|
||||
end_date_str = event.endDate().descriptionWithLocale_(None)
|
||||
|
||||
event_data = {
|
||||
"subject": event.title(),
|
||||
"id": event.eventIdentifier(),
|
||||
"start": start_date_str,
|
||||
"end": end_date_str,
|
||||
"bodyPreview": event.notes() if event.notes() else '',
|
||||
"attendees": attendees,
|
||||
"location": event.location() if event.location() else '',
|
||||
"onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this
|
||||
"showAs": 'busy', # Default to 'busy'
|
||||
"isAllDay": event.isAllDay()
|
||||
}
|
||||
|
||||
event_list.append(event_data)
|
||||
|
||||
return event_list
|
||||
|
||||
async def get_ms365_events(start_date: datetime, end_date: datetime):
|
||||
token = await load_token()
|
||||
if token:
|
||||
if await is_token_expired(token):
|
||||
await refresh_token()
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Access token not found",
|
||||
)
|
||||
# this looks like it might need updating to use tz-aware datetimes converted to UTC...
|
||||
graph_url = f"https://graph.microsoft.com/v1.0/me/events?$filter=start/dateTime ge '{start_date}T00:00:00' and end/dateTime le '{end_date}T23:59:59'"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token['access_token']}",
|
||||
"Prefer": 'outlook.timezone="Pacific Standard Time"',
|
||||
}
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(graph_url, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
ERR("Failed to retrieve events from Microsoft 365")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to retrieve events",
|
||||
)
|
||||
|
||||
ms_events = response.json().get("value", [])
|
||||
return ms_events
|
||||
|
||||
|
||||
async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]):
|
||||
range_start = localize_dt(range_start)
|
||||
range_end = localize_dt(range_end)
|
||||
event_list = []
|
||||
|
||||
for event in events:
|
||||
INFO(f"Event: {event}")
|
||||
start_str = event.get('start')
|
||||
end_str = event.get('end')
|
||||
|
||||
if isinstance(start_str, dict):
|
||||
start_str = start_str.get('dateTime')
|
||||
else:
|
||||
INFO(f"Start date string not a dict")
|
||||
|
||||
if isinstance(end_str, dict):
|
||||
end_str = end_str.get('dateTime')
|
||||
else:
|
||||
INFO(f"End date string not a dict")
|
||||
|
||||
try:
|
||||
start_date = localize_dt(start_str) if start_str else None
|
||||
except (ValueError, TypeError) as e:
|
||||
ERR(f"Invalid start date format: {start_str}, error: {e}")
|
||||
continue
|
||||
|
||||
try:
|
||||
end_date = localize_dt(end_str) if end_str else None
|
||||
except (ValueError, TypeError) as e:
|
||||
ERR(f"Invalid end date format: {end_str}, error: {e}")
|
||||
continue
|
||||
|
||||
DEBUG(f"Comparing {start_date} with range {range_start} to {range_end}")
|
||||
|
||||
if start_date:
|
||||
# Ensure start_date is timezone-aware
|
||||
start_date = localize_dt(start_date)
|
||||
|
||||
# If end_date is not provided, assume it's the same as start_date
|
||||
if not end_date:
|
||||
end_date = start_date
|
||||
else:
|
||||
end_date = localize_dt(end_date)
|
||||
|
||||
# Check if the event overlaps with the given range
|
||||
if (start_date < range_end) and (end_date > range_start):
|
||||
attendees = [{'name': att['name'], 'email': att['email']} for att in event.get('attendees', []) if 'name' in att and 'email' in att]
|
||||
location = event.get('location', '')
|
||||
if isinstance(location, dict):
|
||||
location = location.get('displayName', '')
|
||||
|
||||
event_data = {
|
||||
"name": event.get('subject', ''),
|
||||
"uid": event.get('id', ''),
|
||||
"start": start_date.strftime('%H:%M'),
|
||||
"end": end_date.strftime('%H:%M') if end_date else '',
|
||||
"description": event.get('bodyPreview', ''),
|
||||
"attendees": attendees,
|
||||
"location": location,
|
||||
"url": event.get('onlineMeetingUrl', ''),
|
||||
"busystatus": event.get('showAs', ''),
|
||||
"busy": event.get('showAs', '') in ['busy', 'tentative'],
|
||||
"all_day": event.get('isAllDay', False)
|
||||
}
|
||||
INFO(f"Event_data: {event_data}")
|
||||
event_list.append(event_data)
|
||||
else:
|
||||
DEBUG(f"Event outside of specified range: {start_date} to {end_date}")
|
||||
else:
|
||||
ERR(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
|
||||
|
||||
return event_list
|
209
sijapi/routers/cf.py
Normal file
209
sijapi/routers/cf.py
Normal file
|
@ -0,0 +1,209 @@
|
|||
'''
|
||||
IN DEVELOPMENT - Cloudflare + Caddy module. Based on a bash script that's able to rapidly deploy new Cloudflare subdomains on new Caddy reverse proxy configurations, managing everything including restarting Caddy. The Python version needs more testing before actual use.
|
||||
'''
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from fastapi.responses import PlainTextResponse, JSONResponse
|
||||
from typing import Optional
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
|
||||
import httpx
|
||||
import asyncio
|
||||
from asyncio import sleep
|
||||
import os
|
||||
|
||||
cf = APIRouter()
|
||||
|
||||
class DNSRecordRequest(BaseModel):
|
||||
full_domain: str
|
||||
ip: Optional[str] = None
|
||||
port: str
|
||||
|
||||
|
||||
# Update to make get_zone_id async
|
||||
async def get_zone_id(domain: str) -> str:
|
||||
url = f"{CF_API_BASE_URL}/zones"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {CF_TOKEN}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
params = {"name": domain}
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if data['success']:
|
||||
if len(data['result']) > 0:
|
||||
return data['result'][0]['id']
|
||||
else:
|
||||
raise ValueError(f"No Zone ID found for domain '{domain}'")
|
||||
else:
|
||||
errors = ', '.join(err['message'] for err in data['errors'])
|
||||
raise ValueError(f"Cloudflare API returned errors: {errors}")
|
||||
|
||||
|
||||
|
||||
async def update_caddyfile(full_domain, caddy_ip, port):
|
||||
caddy_config = f"""
|
||||
{full_domain} {{
|
||||
reverse_proxy {caddy_ip}:{port}
|
||||
tls {{
|
||||
dns cloudflare {{"$CLOUDFLARE_API_TOKEN"}}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
with open(CADDYFILE_PATH, 'a') as file:
|
||||
file.write(caddy_config)
|
||||
|
||||
# Using asyncio to create subprocess
|
||||
proc = await asyncio.create_subprocess_exec("sudo", "systemctl", "restart", "caddy")
|
||||
await proc.communicate()
|
||||
|
||||
|
||||
# Retry mechanism for API calls
|
||||
async def retry_request(url, headers, max_retries=5, backoff_factor=1):
|
||||
for retry in range(max_retries):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
|
||||
ERR(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
|
||||
await sleep(backoff_factor * (2 ** retry))
|
||||
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
|
||||
|
||||
# Helper function to load Caddyfile domains
|
||||
def load_caddyfile_domains():
|
||||
with open(CADDYFILE_PATH, 'r') as file:
|
||||
caddyfile_content = file.read()
|
||||
domains = []
|
||||
for line in caddyfile_content.splitlines():
|
||||
if line.strip() and not line.startswith('#'):
|
||||
if "{" in line:
|
||||
domain = line.split("{")[0].strip()
|
||||
domains.append(domain)
|
||||
return domains
|
||||
|
||||
# Endpoint to add new configuration to Cloudflare, Caddyfile, and cf_domains.json
|
||||
@cf.post("/cf/add_config")
|
||||
async def add_config(record: DNSRecordRequest):
|
||||
full_domain = record.full_domain
|
||||
caddy_ip = record.ip or "localhost"
|
||||
port = record.port
|
||||
|
||||
# Extract subdomain and domain
|
||||
parts = full_domain.split(".")
|
||||
if len(parts) == 2:
|
||||
domain = full_domain
|
||||
subdomain = "@"
|
||||
else:
|
||||
subdomain = parts[0]
|
||||
domain = ".".join(parts[1:])
|
||||
|
||||
zone_id = await get_zone_id(domain)
|
||||
if not zone_id:
|
||||
raise HTTPException(status_code=400, detail=f"Zone ID for {domain} could not be found")
|
||||
|
||||
# API call setup for Cloudflare A record
|
||||
endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {CF_TOKEN}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
data = {
|
||||
"type": "A",
|
||||
"name": subdomain,
|
||||
"content": CF_IP,
|
||||
"ttl": 120,
|
||||
"proxied": True
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(endpoint, headers=headers, json=data)
|
||||
|
||||
result = response.json()
|
||||
|
||||
if not result.get("success", False):
|
||||
error_message = result.get("errors", [{}])[0].get("message", "Unknown error")
|
||||
error_code = result.get("errors", [{}])[0].get("code", "Unknown code")
|
||||
raise HTTPException(status_code=400, detail=f"Failed to create A record: {error_message} (Code: {error_code})")
|
||||
|
||||
# Update Caddyfile
|
||||
await update_caddyfile(full_domain, caddy_ip, port)
|
||||
|
||||
return {"message": "Configuration added successfully"}
|
||||
|
||||
|
||||
|
||||
@cf.get("/cf/list_zones")
|
||||
async def list_zones_endpoint():
|
||||
domains = await list_zones()
|
||||
return JSONResponse(domains)
|
||||
|
||||
async def list_zones():
|
||||
endpoint = f"{CF_API_BASE_URL}/zones"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {CF_TOKEN}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
async with httpx.AsyncClient() as client: # async http call
|
||||
response = await client.get(endpoint, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
|
||||
if not result.get("success"):
|
||||
raise HTTPException(status_code=400, detail="Failed to retrieve zones from Cloudflare")
|
||||
|
||||
zones = result.get("result", [])
|
||||
domains = {}
|
||||
|
||||
for zone in zones:
|
||||
zone_id = zone.get("id")
|
||||
zone_name = zone.get("name")
|
||||
domains[zone_name] = {"zone_id": zone_id}
|
||||
|
||||
records_endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
|
||||
async with httpx.AsyncClient() as client: # async http call
|
||||
records_response = await client.get(records_endpoint, headers=headers)
|
||||
records_result = records_response.json()
|
||||
|
||||
if not records_result.get("success"):
|
||||
raise HTTPException(status_code=400, detail=f"Failed to retrieve DNS records for zone {zone_name}")
|
||||
|
||||
records = records_result.get("result", [])
|
||||
for record in records:
|
||||
record_id = record.get("id")
|
||||
domain_name = record.get("name").replace(f".{zone_name}", "")
|
||||
domains[zone_name].setdefault(domain_name, {})["dns_id"] = record_id
|
||||
|
||||
return domains
|
||||
|
||||
@cf.get("/cf/compare_caddy", response_class=PlainTextResponse)
|
||||
async def crossreference_caddyfile():
|
||||
cf_domains_data = await list_zones()
|
||||
caddyfile_domains = load_caddyfile_domains()
|
||||
|
||||
cf_domains_list = [
|
||||
f"{sub}.{domain}" if sub != "@" else domain
|
||||
for domain, data in cf_domains_data.items()
|
||||
for sub in data.get("subdomains", {}).keys()
|
||||
]
|
||||
caddyfile_domains_set = set(caddyfile_domains)
|
||||
cf_domains_set = set(cf_domains_list)
|
||||
|
||||
only_in_caddyfile = caddyfile_domains_set - cf_domains_set
|
||||
only_in_cf_domains = cf_domains_set - caddyfile_domains_set
|
||||
|
||||
markdown_output = "# Cross-reference cf_domains.json and Caddyfile\n\n"
|
||||
markdown_output += "## Domains only in Caddyfile:\n\n"
|
||||
for domain in only_in_caddyfile:
|
||||
markdown_output += f"- **{domain}**\n"
|
||||
|
||||
markdown_output += "\n## Domains only in cf_domains.json:\n\n"
|
||||
for domain in only_in_cf_domains:
|
||||
markdown_output += f"- **{domain}**\n"
|
||||
|
||||
return markdown_output
|
253
sijapi/routers/email.py
Normal file
253
sijapi/routers/email.py
Normal file
|
@ -0,0 +1,253 @@
|
|||
'''
|
||||
IN DEVELOPMENT Email module. Uses IMAP and SMTP login credentials to monitor an inbox and summarize incoming emails that match certain criteria and save the Text-To-Speech converted summaries into a specified "podcast" folder.
|
||||
UNIMPLEMENTED: AI auto-responder.
|
||||
'''
|
||||
from fastapi import APIRouter
|
||||
import asyncio
|
||||
from imbox import Imbox
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
from pathlib import Path
|
||||
from shutil import move
|
||||
import tempfile
|
||||
import re
|
||||
import ssl
|
||||
from smtplib import SMTP_SSL
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from datetime import datetime as dt_datetime
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Any
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import HOME_DIR, DATA_DIR, OBSIDIAN_VAULT_DIR, PODCAST_DIR, IMAP, OBSIDIAN_JOURNAL_DIR, DEFAULT_VOICE, AUTORESPONSE_BLACKLIST, AUTORESPONSE_WHITELIST, AUTORESPONSE_CONTEXT, USER_FULLNAME, USER_BIO, AUTORESPOND, TZ
|
||||
from sijapi.routers import summarize, tts, llm
|
||||
from sijapi.utilities import clean_text, assemble_journal_path, localize_dt, extract_text, prefix_lines
|
||||
|
||||
|
||||
email = APIRouter(tags=["private"])
|
||||
|
||||
|
||||
class Contact(BaseModel):
|
||||
email: str
|
||||
name: str
|
||||
class EmailModel(BaseModel):
|
||||
sender: str
|
||||
recipients: List[Contact]
|
||||
datetime_received: dt_datetime
|
||||
subject: str
|
||||
body: str
|
||||
attachments: Optional[List[Any]] = None
|
||||
|
||||
def imap_conn():
|
||||
return Imbox(IMAP.host,
|
||||
username=IMAP.email,
|
||||
password=IMAP.password,
|
||||
port=IMAP.imap_port,
|
||||
ssl=IMAP.imap_encryption == 'SSL',
|
||||
starttls=IMAP.imap_encryption == 'STARTTLS')
|
||||
|
||||
|
||||
def clean_email_content(html_content):
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
return re.sub(r'[ \t\r\n]+', ' ', soup.get_text()).strip()
|
||||
|
||||
|
||||
async def extract_attachments(attachments) -> List[str]:
|
||||
attachment_texts = []
|
||||
for attachment in attachments:
|
||||
attachment_name = attachment.get('filename', 'tempfile.txt')
|
||||
_, ext = os.path.splitext(attachment_name)
|
||||
ext = ext.lower() if ext else '.txt'
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
|
||||
tmp_file.write(attachment['content'].getvalue())
|
||||
tmp_file_path = tmp_file.name
|
||||
|
||||
try:
|
||||
attachment_text = await extract_text(tmp_file_path)
|
||||
attachment_texts.append(attachment_text)
|
||||
finally:
|
||||
if os.path.exists(tmp_file_path):
|
||||
os.remove(tmp_file_path)
|
||||
|
||||
return attachment_texts
|
||||
|
||||
|
||||
async def process_unread_emails(auto_respond: bool = AUTORESPOND, summarize_emails: bool = True, podcast: bool = True):
|
||||
while True:
|
||||
try:
|
||||
with imap_conn() as inbox:
|
||||
unread_messages = inbox.messages(unread=True)
|
||||
for uid, message in unread_messages:
|
||||
recipients = [Contact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
|
||||
this_email = EmailModel(
|
||||
sender=message.sent_from[0]['email'],
|
||||
datetime_received=localize_dt(message.date),
|
||||
recipients=recipients,
|
||||
subject=message.subject,
|
||||
body=clean_email_content(message.body['html'][0]) if message.body['html'] else clean_email_content(message.body['plain'][0]) or "",
|
||||
attachments=message.attachments
|
||||
)
|
||||
|
||||
DEBUG(f"\n\nProcessing email: {this_email.subject}\n\n")
|
||||
md_path, md_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".md")
|
||||
tts_path, tts_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".wav")
|
||||
if summarize_emails:
|
||||
email_content = f'At {this_email.datetime_received}, {this_email.sender} sent an email with the subject line "{this_email.subject}". The email in its entirety reads: \n\n{this_email.body}\n"'
|
||||
if this_email.attachments:
|
||||
attachment_texts = await extract_attachments(this_email.attachments)
|
||||
email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts])
|
||||
|
||||
summary = await summarize.summarize_text(email_content)
|
||||
await tts.local_tts(text_content = summary, speed = 1.1, voice = DEFAULT_VOICE, podcast = podcast, output_path = tts_path)
|
||||
|
||||
if podcast:
|
||||
if PODCAST_DIR.exists():
|
||||
tts.copy_to_podcast_dir(tts_path)
|
||||
else:
|
||||
ERR(f"PODCAST_DIR does not exist: {PODCAST_DIR}")
|
||||
|
||||
save_email_as_markdown(this_email, summary, md_path, tts_relative)
|
||||
else:
|
||||
save_email_as_markdown(this_email, None, md_path, None)
|
||||
|
||||
if auto_respond and should_auto_respond(this_email):
|
||||
DEBUG(f"Auto-responding to {this_email.subject}")
|
||||
auto_response_subject = 'Auto-Response Re:' + this_email.subject
|
||||
auto_response_body = await generate_auto_response_body(this_email)
|
||||
DEBUG(f"Auto-response: {auto_response_body}")
|
||||
await send_auto_response(this_email.sender, auto_response_subject, auto_response_body)
|
||||
|
||||
inbox.mark_seen(uid)
|
||||
|
||||
await asyncio.sleep(30)
|
||||
except Exception as e:
|
||||
ERR(f"An error occurred: {e}")
|
||||
await asyncio.sleep(30)
|
||||
|
||||
|
||||
def save_email_as_markdown(email: EmailModel, summary: str, md_path: Path, tts_path: Path):
|
||||
'''
|
||||
Saves an email as a markdown file in the specified directory.
|
||||
Args:
|
||||
email (EmailModel): The email object containing email details.
|
||||
summary (str): The summary of the email.
|
||||
tts_path (str): The path to the text-to-speech audio file.
|
||||
'''
|
||||
|
||||
# Sanitize filename to avoid issues with filesystems
|
||||
filename = f"{email.datetime_received.strftime('%Y%m%d%H%M%S')}_{email.subject.replace('/', '-')}.md".replace(':', '-').replace(' ', '_')
|
||||
|
||||
summary = prefix_lines(summary, '> ')
|
||||
# Create the markdown content
|
||||
markdown_content = f'''---
|
||||
date: {email.datetime_received.strftime('%Y-%m-%d')}
|
||||
tags:
|
||||
- email
|
||||
---
|
||||
| | | |
|
||||
| --: | :--: | :--: |
|
||||
| *received* | **{email.datetime_received.strftime('%B %d, %Y at %H:%M:%S %Z')}** | |
|
||||
| *from* | **[[{email.sender}]]** | |
|
||||
| *to* | {', '.join([f'**[[{recipient}]]**' for recipient in email.recipients])} | |
|
||||
| *subject* | **{email.subject}** | |
|
||||
'''
|
||||
|
||||
if summary:
|
||||
markdown_content += f'''
|
||||
> [!summary] Summary
|
||||
> {summary}
|
||||
'''
|
||||
|
||||
if tts_path:
|
||||
markdown_content += f'''
|
||||
![[{tts_path}]]
|
||||
'''
|
||||
|
||||
markdown_content += f'''
|
||||
---
|
||||
{email.body}
|
||||
'''
|
||||
|
||||
with open(md_path, 'w', encoding='utf-8') as md_file:
|
||||
md_file.write(markdown_content)
|
||||
|
||||
DEBUG(f"Saved markdown to {md_path}")
|
||||
|
||||
|
||||
AUTORESPONSE_SYS = "You are a helpful AI assistant that generates personalized auto-response messages to incoming emails."
|
||||
|
||||
async def generate_auto_response_body(e: EmailModel, response_style: str = "professional") -> str:
|
||||
age = dt_datetime.now(TZ) - e.datetime_received
|
||||
prompt = f'''
|
||||
Please generate a personalized auto-response to the following email. The email is from {e.sender} and was sent {age} ago with the subject line "{e.subject}." You are auto-responding on behalf of {USER_FULLNAME}, who is described by the following short bio (strictly for your context -- do not recite this in the response): "{USER_BIO}." {USER_FULLNAME} is unable to respond himself, because {AUTORESPONSE_CONTEXT}. Everything from here to ~~//END//~~ is the email body.
|
||||
{e.body}
|
||||
~~//END//~~
|
||||
Keep your auto-response {response_style} and to the point, but do aim to make it responsive specifically to the sender's inquiry.
|
||||
'''
|
||||
|
||||
try:
|
||||
response = await llm.query_ollama(prompt, AUTORESPONSE_SYS, 400)
|
||||
return response
|
||||
except Exception as e:
|
||||
ERR(f"Error generating auto-response: {str(e)}")
|
||||
return "Thank you for your email. Unfortunately, an error occurred while generating the auto-response. We apologize for any inconvenience."
|
||||
|
||||
async def send_auto_response(to_email, subject, body):
|
||||
try:
|
||||
message = MIMEMultipart()
|
||||
message['From'] = IMAP.email # smtp_username
|
||||
message['To'] = to_email
|
||||
message['Subject'] = subject
|
||||
message.attach(MIMEText(body, 'plain'))
|
||||
|
||||
# DEBUG(f"Attempting to send auto_response to {to_email} concerning {subject}. We will use {IMAP.host}:{IMAP.smtp_port}, un: {IMAP.email}, pw: {IMAP.password}")
|
||||
|
||||
try:
|
||||
DEBUG(f"Initiating attempt to send auto-response via SMTP at {IMAP.host}:{IMAP.smtp_port}...")
|
||||
context = ssl._create_unverified_context()
|
||||
|
||||
with SMTP_SSL(IMAP.host, IMAP.smtp_port, context=context) as server:
|
||||
server.login(IMAP.email, IMAP.password)
|
||||
DEBUG(f"Successfully logged in to {IMAP.host} at {IMAP.smtp_port} as {IMAP.email}. Attempting to send email now.")
|
||||
server.send_message(message)
|
||||
|
||||
INFO(f"Auto-response sent to {to_email} concerning {subject}")
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Failed to send auto-response email to {to_email}: {e}")
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error in preparing/sending auto-response: {e}")
|
||||
raise e
|
||||
|
||||
def should_auto_respond(email: EmailModel) -> bool:
|
||||
def matches_list(item: str, email: EmailModel) -> bool:
|
||||
if '@' in item:
|
||||
if item in email.sender:
|
||||
return True
|
||||
else:
|
||||
if item.lower() in email.subject.lower() or item.lower() in email.body.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
if AUTORESPONSE_WHITELIST:
|
||||
for item in AUTORESPONSE_WHITELIST:
|
||||
if matches_list(item, email):
|
||||
if AUTORESPONSE_BLACKLIST:
|
||||
for blacklist_item in AUTORESPONSE_BLACKLIST:
|
||||
if matches_list(blacklist_item, email):
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
if AUTORESPONSE_BLACKLIST:
|
||||
for item in AUTORESPONSE_BLACKLIST:
|
||||
if matches_list(item, email):
|
||||
return False
|
||||
return True
|
||||
|
||||
@email.on_event("startup")
|
||||
async def startup_event():
|
||||
asyncio.create_task(process_unread_emails())
|
66
sijapi/routers/health.py
Normal file
66
sijapi/routers/health.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
'''
|
||||
Health check module. /health returns `'status': 'ok'`, /id returns TS_ID, /routers responds with a list of the active routers, /ip responds with the device's local IP, /ts_ip responds with its tailnet IP, and /wan_ip responds with WAN IP.
|
||||
Depends on:
|
||||
TS_ID, ROUTERS, LOGGER, SUBNET_BROADCAST
|
||||
'''
|
||||
import os
|
||||
import httpx
|
||||
import socket
|
||||
from fastapi import APIRouter
|
||||
from tailscale import Tailscale
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import TS_ID, ROUTERS, SUBNET_BROADCAST
|
||||
|
||||
health = APIRouter(tags=["public", "trusted", "private"])
|
||||
|
||||
@health.get("/health")
|
||||
def get_health():
|
||||
return {"status": "ok"}
|
||||
|
||||
@health.get("/id")
|
||||
def get_health() -> str:
|
||||
return TS_ID
|
||||
|
||||
@health.get("/routers")
|
||||
def get_routers() -> str:
|
||||
listrouters = ", ".join(ROUTERS)
|
||||
return listrouters
|
||||
|
||||
@health.get("/ip")
|
||||
def get_local_ip():
|
||||
"""Get the server's local IP address."""
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
try:
|
||||
s.connect((f'{SUBNET_BROADCAST}', 1))
|
||||
IP = s.getsockname()[0]
|
||||
except Exception:
|
||||
IP = '127.0.0.1'
|
||||
finally:
|
||||
s.close()
|
||||
return IP
|
||||
|
||||
@health.get("/wan_ip")
|
||||
async def get_wan_ip():
|
||||
"""Get the WAN IP address using Mullvad's API."""
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get('https://am.i.mullvad.net/json')
|
||||
response.raise_for_status()
|
||||
wan_info = response.json()
|
||||
return wan_info.get('ip', 'Unavailable')
|
||||
except Exception as e:
|
||||
WARN(f"Error fetching WAN IP: {e}")
|
||||
return "Unavailable"
|
||||
|
||||
@health.get("/ts_ip")
|
||||
async def get_tailscale_ip():
|
||||
"""Get the Tailscale IP address."""
|
||||
tailnet = os.getenv("TAILNET")
|
||||
api_key = os.getenv("TAILSCALE_API_KEY")
|
||||
async with Tailscale(tailnet=tailnet, api_key=api_key) as tailscale:
|
||||
devices = await tailscale.devices()
|
||||
if devices:
|
||||
# Assuming you want the IP of the first device in the list
|
||||
return devices[0]['addresses'][0]
|
||||
else:
|
||||
return "No devices found"
|
358
sijapi/routers/hooks.py
Normal file
358
sijapi/routers/hooks.py
Normal file
|
@ -0,0 +1,358 @@
|
|||
'''
|
||||
Webhook module for specific use cases.
|
||||
Depends on:
|
||||
LOGGER, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, MAC_UN, MAC_PW, MAC_ID, TS_TAILNET
|
||||
'''
|
||||
from fastapi import APIRouter, Request, BackgroundTasks, HTTPException, status
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
import httpx
|
||||
import json
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
import os, io
|
||||
from PyPDF2 import PdfReader
|
||||
import aiohttp
|
||||
import paramiko
|
||||
import time
|
||||
import subprocess
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional
|
||||
import requests
|
||||
import base64
|
||||
from hashlib import sha256
|
||||
# from O365 import Account, FileSystemTokenBackend
|
||||
from typing import List
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, MAC_UN, MAC_PW, MAC_ID, TS_TAILNET
|
||||
|
||||
hooks = APIRouter()
|
||||
|
||||
with open(CASETABLE_PATH, 'r') as file:
|
||||
CASETABLE = json.load(file)
|
||||
|
||||
class WidgetUpdate(BaseModel):
|
||||
text: Optional[str] = None
|
||||
progress: Optional[str] = None
|
||||
icon: Optional[str] = None
|
||||
color: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
shortcut: Optional[str] = None
|
||||
graph: Optional[str] = None
|
||||
|
||||
|
||||
@hooks.get("/health_check")
|
||||
def hook_health():
|
||||
shellfish_health_check()
|
||||
|
||||
@hooks.post("/update_widget")
|
||||
def hook_widget_update(update: WidgetUpdate):
|
||||
shellfish_update_widget(update)
|
||||
|
||||
@hooks.get("/alert")
|
||||
async def hook_alert(request: Request):
|
||||
alert = request.query_params.get('alert')
|
||||
if not alert:
|
||||
raise HTTPException(status_code=400, detail='No alert provided.')
|
||||
|
||||
return await notify(alert)
|
||||
|
||||
@hooks.post("/alert/cd")
|
||||
async def hook_changedetection(webhook_data: dict):
|
||||
body = webhook_data.get("body", {})
|
||||
message = body.get("message", "")
|
||||
|
||||
if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]):
|
||||
filename = ALERTS_DIR / f"alert_{int(time.time())}.json"
|
||||
filename.write_text(json.dumps(webhook_data, indent=4))
|
||||
|
||||
notify(message)
|
||||
|
||||
return {"status": "received"}
|
||||
|
||||
|
||||
@hooks.post("/cl/search")
|
||||
async def hook_cl_search(request: Request, background_tasks: BackgroundTasks):
|
||||
client_ip = request.client.host
|
||||
DEBUG(f"Received request from IP: {client_ip}")
|
||||
data = await request.json()
|
||||
payload = data['payload']
|
||||
results = data['payload']['results']
|
||||
|
||||
# Save the payload data
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json"
|
||||
with open(payload_file, 'w') as file:
|
||||
json.dump(payload, file, indent=2)
|
||||
|
||||
for result in results:
|
||||
background_tasks.add_task(cl_search_process_result, result)
|
||||
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
|
||||
|
||||
@hooks.post("/cl/docket")
|
||||
async def hook_cl_docket(request: Request):
|
||||
client_ip = request.client.host
|
||||
DEBUG(f"Received request from IP: {client_ip}")
|
||||
data = await request.json()
|
||||
await cl_docket(data, client_ip)
|
||||
|
||||
async def notify(alert: str):
|
||||
try:
|
||||
await notify_shellfish(alert)
|
||||
|
||||
if TS_ID == MAC_ID:
|
||||
await notify_local(alert)
|
||||
else:
|
||||
await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to send alert: {str(e)}")
|
||||
|
||||
return {"message": alert}
|
||||
|
||||
async def notify_local(message: str):
|
||||
await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
|
||||
|
||||
|
||||
# Asynchronous remote notification using paramiko SSH
|
||||
async def notify_remote(host: str, message: str, username: str = None, password: str = None, key_filename: str = None):
|
||||
ssh = paramiko.SSHClient()
|
||||
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
|
||||
connect_kwargs = {'hostname': host, 'username': username}
|
||||
if key_filename:
|
||||
connect_kwargs['key_filename'] = key_filename
|
||||
else:
|
||||
connect_kwargs['password'] = password
|
||||
|
||||
await asyncio.to_thread(ssh.connect, **connect_kwargs)
|
||||
await asyncio.to_thread(ssh.exec_command, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
|
||||
ssh.close()
|
||||
|
||||
|
||||
|
||||
async def notify_shellfish(alert: str):
|
||||
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
|
||||
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
|
||||
iv = "ab5bbeb426015da7eedcee8bee3dffb7"
|
||||
|
||||
plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n"
|
||||
|
||||
openssl_command = [
|
||||
"openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv
|
||||
]
|
||||
|
||||
process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode())
|
||||
|
||||
if process.returncode != 0:
|
||||
raise Exception(f"OpenSSL encryption failed: {stderr.decode()}")
|
||||
|
||||
base64_encoded = stdout.decode().strip()
|
||||
|
||||
url = f"https://secureshellfish.app/push/?user={user}&mutable"
|
||||
headers = {"Content-Type": "text/plain"}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, data=base64_encoded) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f"Failed to send notification: {response.status_code}")
|
||||
|
||||
## SHELLFISH ##
|
||||
def shellfish_health_check():
|
||||
addresses = [
|
||||
"https://api.sij.ai/health",
|
||||
"http://100.64.64.20:4444/health",
|
||||
"http://100.64.64.30:4444/health",
|
||||
"http://100.64.64.11:4444/health",
|
||||
"http://100.64.64.15:4444/health"
|
||||
]
|
||||
|
||||
results = []
|
||||
up_count = 0
|
||||
for address in addresses:
|
||||
try:
|
||||
response = requests.get(address)
|
||||
if response.status_code == 200:
|
||||
results.append(f"{address} is up")
|
||||
up_count += 1
|
||||
else:
|
||||
results.append(f"{address} returned status code {response.status_code}")
|
||||
except requests.exceptions.RequestException:
|
||||
results.append(f"{address} is down")
|
||||
|
||||
# Generate a simple text-based graph
|
||||
graph = '|' * up_count + '.' * (len(addresses) - up_count)
|
||||
text_update = "\n".join(results)
|
||||
|
||||
widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"]
|
||||
output = shellfish_run_widget_command(widget_command)
|
||||
return {"output": output, "graph": graph}
|
||||
|
||||
|
||||
def shellfish_update_widget(update: WidgetUpdate):
|
||||
widget_command = ["widget"]
|
||||
|
||||
if update.text:
|
||||
widget_command.extend(["--text", update.text])
|
||||
if update.progress:
|
||||
widget_command.extend(["--progress", update.progress])
|
||||
if update.icon:
|
||||
widget_command.extend(["--icon", update.icon])
|
||||
if update.color:
|
||||
widget_command.extend(["--color", update.color])
|
||||
if update.url:
|
||||
widget_command.extend(["--url", update.url])
|
||||
if update.shortcut:
|
||||
widget_command.extend(["--shortcut", update.shortcut])
|
||||
if update.graph:
|
||||
widget_command.extend(["--text", update.graph])
|
||||
|
||||
output = shellfish_run_widget_command(widget_command)
|
||||
return {"output": output}
|
||||
|
||||
|
||||
def shellfish_run_widget_command(args: List[str]):
|
||||
result = subprocess.run(args, capture_output=True, text=True, shell=True)
|
||||
if result.returncode != 0:
|
||||
raise HTTPException(status_code=500, detail=result.stderr)
|
||||
return result.stdout
|
||||
|
||||
|
||||
### COURTLISTENER FUNCTIONS ###
|
||||
async def cl_docket(data, client_ip, background_tasks: BackgroundTasks):
|
||||
payload = data['payload']
|
||||
results = data['payload']['results']
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json"
|
||||
with open(payload_file, 'w') as file:
|
||||
json.dump(payload, file, indent=2)
|
||||
|
||||
for result in results:
|
||||
background_tasks.add_task(cl_docket_process, result)
|
||||
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
|
||||
|
||||
async def cl_docket_process(result):
|
||||
async with httpx.AsyncClient() as session:
|
||||
await cl_docket_process_result(result, session)
|
||||
|
||||
async def cl_docket_process_result(result, session):
|
||||
docket = str(result.get('docket'))
|
||||
case_code, case_shortname = cl_case_details(docket)
|
||||
date_filed = result.get('date_filed', 'No Date Filed')
|
||||
|
||||
try:
|
||||
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
|
||||
except ValueError:
|
||||
date_filed_formatted = 'NoDateFiled'
|
||||
|
||||
# Fetching court docket information from the API
|
||||
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
|
||||
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
DEBUG(f"Fetching CourtListener docket information for {docket}...")
|
||||
data = await response.json()
|
||||
court_docket = data['results'][0]['docket_number_core']
|
||||
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
||||
case_name = data['results'][0]['case_name']
|
||||
DEBUG(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
|
||||
else:
|
||||
DEBUG("Failed to fetch data from CourtListener API.")
|
||||
court_docket = 'NoCourtDocket'
|
||||
case_name = 'NoCaseName'
|
||||
|
||||
for document in result.get('recap_documents', []):
|
||||
filepath_ia = document.get('filepath_ia')
|
||||
filepath_local = document.get('filepath_local')
|
||||
|
||||
if filepath_ia:
|
||||
file_url = filepath_ia
|
||||
DEBUG(f"Found IA file at {file_url}.")
|
||||
elif filepath_local:
|
||||
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
|
||||
DEBUG(f"Found local file at {file_url}.")
|
||||
else:
|
||||
DEBUG(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
|
||||
continue
|
||||
|
||||
document_number = document.get('document_number', 'NoDocumentNumber')
|
||||
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
|
||||
description = description[:50] # Truncate description
|
||||
# case_shortname = case_name # TEMPORARY OVERRIDE
|
||||
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
|
||||
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
await cl_download_file(file_url, target_path, session)
|
||||
DEBUG(f"Downloaded {file_name} to {target_path}")
|
||||
|
||||
def cl_case_details(docket):
|
||||
case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
|
||||
case_code = case_info.get("code")
|
||||
short_name = case_info.get("shortname")
|
||||
return case_code, short_name
|
||||
|
||||
async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
|
||||
}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
DEBUG(f"Attempting to download {url} to {path}.")
|
||||
try:
|
||||
async with session.get(url, headers=headers, allow_redirects=True) as response:
|
||||
if response.status == 403:
|
||||
ERR(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
|
||||
return
|
||||
response.raise_for_status()
|
||||
|
||||
# Check if the response content type is a PDF
|
||||
content_type = response.headers.get('Content-Type')
|
||||
if content_type != 'application/pdf':
|
||||
ERR(f"Invalid content type: {content_type}. Skipping download.")
|
||||
return
|
||||
|
||||
# Create an in-memory buffer to store the downloaded content
|
||||
buffer = io.BytesIO()
|
||||
async for chunk in response.content.iter_chunked(1024):
|
||||
buffer.write(chunk)
|
||||
|
||||
# Reset the buffer position to the beginning
|
||||
buffer.seek(0)
|
||||
|
||||
# Validate the downloaded PDF content
|
||||
try:
|
||||
PdfReader(buffer)
|
||||
except Exception as e:
|
||||
ERR(f"Invalid PDF content: {str(e)}. Skipping download.")
|
||||
return
|
||||
|
||||
# If the PDF is valid, write the content to the file on disk
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open('wb') as file:
|
||||
file.write(buffer.getvalue())
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error downloading file: {str(e)}")
|
||||
|
||||
|
||||
async def cl_search_process_result(result):
|
||||
async with httpx.AsyncClient() as session:
|
||||
download_url = result.get('download_url')
|
||||
court_id = result.get('court_id')
|
||||
case_name_short = result.get('caseNameShort')
|
||||
case_name = result.get('caseName')
|
||||
DEBUG(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
|
||||
|
||||
court_folder = court_id
|
||||
|
||||
if case_name_short:
|
||||
case_folder = case_name_short
|
||||
else:
|
||||
case_folder = case_name
|
||||
|
||||
file_name = download_url.split('/')[-1]
|
||||
target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
await cl_download_file(download_url, target_path, session)
|
||||
DEBUG(f"Downloaded {file_name} to {target_path}")
|
973
sijapi/routers/ig.py
Normal file
973
sijapi/routers/ig.py
Normal file
|
@ -0,0 +1,973 @@
|
|||
'''
|
||||
IN DEVELOPMENT: Instagram AI bot module.
|
||||
'''
|
||||
from fastapi import APIRouter, UploadFile
|
||||
import os
|
||||
import io
|
||||
import copy
|
||||
import re
|
||||
import jwt
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
import pyotp
|
||||
import time
|
||||
import pytz
|
||||
import requests
|
||||
import tempfile
|
||||
import random
|
||||
import subprocess
|
||||
import urllib.request
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter
|
||||
from time import sleep
|
||||
from datetime import timedelta, datetime as date
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel
|
||||
from typing import Dict, List, Optional
|
||||
import instagrapi
|
||||
from instagrapi import Client as igClient
|
||||
from instagrapi.types import UserShort
|
||||
from urllib.parse import urlparse
|
||||
from instagrapi.exceptions import LoginRequired as ClientLoginRequiredError
|
||||
import json
|
||||
from ollama import Client as oLlama
|
||||
from sd import sd
|
||||
from dotenv import load_dotenv
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, COMFYUI_DIR
|
||||
|
||||
import io
|
||||
from io import BytesIO
|
||||
import base64
|
||||
|
||||
ig = APIRouter()
|
||||
|
||||
class IG_Request(BaseModel):
|
||||
file: Optional[UploadFile] = None # upload a particular file to Instagram
|
||||
profile: Optional[str] = None # specify the profile account to use (uses the shortnames defined per folders and the config file)
|
||||
local_only: Optional[bool] = False # overrides all other settings to ensure images are generated locally and stay local
|
||||
openai: Optional[str] = None # OpenAI API key; if included, will rely on it for DALL-E, GPT-4, and GPT-4-Vision unless otherwise overridden
|
||||
llm: Optional[str] = "llama3" # if a valid OpenAI model name is provided, it will be used; otherwise it will attempt to match to an Ollama model (if one exists)
|
||||
i2t: Optional[str] = "llava" # set to GPT-4-Vision to use the OpenAI image-2-text model, otherwise this will attempt to match to a vision-capable Ollama model
|
||||
t2i: Optional[str] = None # set to DALL-E to use the OpenAI model, or use it to override the StableDiffusion workflow that's otherwise selected. Leave blank to use defaults per the config file
|
||||
ig_post: Optional[str] = True # if given a value, will use this as the category of post; if given no value, willuse all categories unless ig_comment_only is enabled
|
||||
ig_comment: Optional[str] = None # if given a value, will use this as the category of comment; if given no value, will use all categories unless ig_post_only is enabled
|
||||
ig_comment_user: Optional[str] = None # target a particular user for comments
|
||||
ig_comment_url: Optional[str] = None # target a particular ig url for comments
|
||||
ghost_post: Optional[bool] = True # enable posting to Ghost
|
||||
sleep_short: Optional[int] = 5 # average duration of short intervals (a few seconds is adequate; this is to simulate doomscrolling latency)
|
||||
sleep_long: Optional[int] = 180 # agerage duration of long intervals (this should be about a minute at least; it simulates the time it takes to write a comment or prepare a post)
|
||||
|
||||
IG_PROFILE = os.getenv("IG_PROFILE")
|
||||
IG_SHORT_SLEEP = int(os.getenv("IG_SHORT_SLEEP", 5))
|
||||
IG_LONG_SLEEP = int(os.getenv("IG_LONG_SLEEP", 180))
|
||||
IG_POST_GHOST = os.getenv("IG_POST_GHOST")
|
||||
IG_VISION_LLM = os.getenv("IG_VISION_LLM")
|
||||
IG_PROMPT_LLM = os.getenv("IG_PROMPT_LLM")
|
||||
IG_IMG_GEN = os.getenv("IG_IMG_GEN", "ComfyUI")
|
||||
IG_OUTPUT_PLATFORMS = os.getenv("IG_OUTPUT_PLATFORMS", "ig,ghost,obsidian").split(',')
|
||||
SD_WORKFLOWS_DIR = os.path.join(COMFYUI_DIR, 'workflows')
|
||||
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
|
||||
IG_PROFILES_DIR = os.path.join(BASE_DIR, 'profiles')
|
||||
IG_PROFILE_DIR = os.path.join(IG_PROFILES_DIR, PROFILE)
|
||||
IG_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'images')
|
||||
IG_PROFILE_CONFIG_PATH = os.path.join(IG_PROFILE_DIR, f'config.json')
|
||||
IG_VIEWED_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'downloads')
|
||||
|
||||
with open(IG_PROFILE_CONFIG_PATH, 'r') as config_file:
|
||||
PROFILE_CONFIG = json.load(config_file)
|
||||
|
||||
if not os.path.exists(IG_IMAGES_DIR):
|
||||
os.makedirs(IG_IMAGES_DIR )
|
||||
|
||||
OPENAI_API_KEY=PROFILE_CONFIG.get("openai_key")
|
||||
|
||||
|
||||
###################
|
||||
### VALIDATION ###
|
||||
##################
|
||||
|
||||
|
||||
if args.profile and args.posttype and not args.custompost and not args.posttype in PROFILE_CONFIG["posts"]:
|
||||
print ("ERROR: NO SUCH POST TYPE IS AVAILABLE FOR THIS PROFILE.")
|
||||
|
||||
if args.profile and args.commenttype and not args.commenttype in PROFILE_CONFIG["comments"]:
|
||||
print ("ERROR: NO SUCH COMMENT TYPE IS AVAILABLE FOR THIS PROFILE.")
|
||||
|
||||
|
||||
####################
|
||||
### CLIENT SETUP ###
|
||||
####################
|
||||
|
||||
cl = igClient(request_timeout=1)
|
||||
|
||||
|
||||
IMG_GEN = OpenAI(api_key=OPENAI_API_KEY)
|
||||
IMG_MODEL = "dall-e-3"
|
||||
|
||||
COMFYUI_URL = "http://localhost:8188"
|
||||
CLIENT_ID = str(uuid.uuid4())
|
||||
|
||||
|
||||
###############################
|
||||
### INSTAGRAM & GHOST SETUP ###
|
||||
###############################
|
||||
IG_USERNAME = PROFILE_CONFIG.get("ig_name")
|
||||
IG_PASSWORD = PROFILE_CONFIG.get("ig_pass")
|
||||
IG_SECRET_KEY = PROFILE_CONFIG.get("ig_2fa_secret")
|
||||
IG_SESSION_PATH = os.path.join(IG_PROFILE_DIR, f'credentials.json')
|
||||
|
||||
GHOST_API_URL=PROFILE_CONFIG.get("ghost_admin_url")
|
||||
GHOST_API_KEY=PROFILE_CONFIG.get("ghost_admin_api_key")
|
||||
GHOST_CONTENT_KEY=PROFILE_CONFIG.get("ghost_content_key")
|
||||
|
||||
########################
|
||||
### LLM PROMPT SETUP ###
|
||||
########################
|
||||
IMG_PROMPT_SYS = PROFILE_CONFIG.get("img_prompt_sys")
|
||||
IMG_DESCRIPTION_SYS = PROFILE_CONFIG.get("img_description_sys")
|
||||
COMMENT_PROMPT_SYS = PROFILE_CONFIG.get("img_comment_sys")
|
||||
HASHTAGS = PROFILE_CONFIG.get("preferred_hashtags", [])
|
||||
IMAGE_URL = args.image_url
|
||||
rollover_time = 1702605780
|
||||
COMPLETED_MEDIA_LOG = os.path.join(IG_PROFILE_DIR, f'completed-media.txt')
|
||||
TOTP = pyotp.TOTP(IG_SECRET_KEY)
|
||||
SHORT = args.shortsleep
|
||||
LONG = args.longsleep
|
||||
|
||||
|
||||
def follow_by_username(username) -> bool:
|
||||
"""
|
||||
Follow a user, return true if successful false if not.
|
||||
"""
|
||||
userid = cl.user_id_from_username(username)
|
||||
sleep(SHORT)
|
||||
return cl.user_follow(userid)
|
||||
|
||||
def unfollow_by_username(username) -> bool:
|
||||
"""
|
||||
Unfollow a user, return true if successful false if not.
|
||||
"""
|
||||
userid = cl.user_id_from_username(username)
|
||||
sleep(SHORT)
|
||||
return cl.user_unfollow(userid)
|
||||
|
||||
def get_poster_of_post(shortcode):
|
||||
media_info = cl.media_info_by_shortcode(shortcode)
|
||||
poster_username = media_info.user.username
|
||||
return(poster_username)
|
||||
|
||||
|
||||
def get_followers(amount: int = 0) -> Dict[int, UserShort]:
|
||||
"""
|
||||
Get followers, return Dict of user_id and User object
|
||||
"""
|
||||
return cl.user_followers(cl.user_id, amount=amount)
|
||||
|
||||
|
||||
def get_followers_usernames(amount: int = 0) -> List[str]:
|
||||
"""
|
||||
Get bot's followers usernames, return List of usernames
|
||||
"""
|
||||
followers = cl.user_followers(cl.user_id, amount=amount)
|
||||
sleep(SHORT)
|
||||
return [user.username for user in followers.values()]
|
||||
|
||||
def get_following(amount: int = 0) -> Dict[int, UserShort]:
|
||||
"""
|
||||
Get bot's followed users, return Dict of user_id and User object
|
||||
"""
|
||||
sleep(SHORT)
|
||||
return cl.user_following(cl.user_id, amount=amount)
|
||||
|
||||
|
||||
def get_user_media(username, amount=30):
|
||||
"""
|
||||
Fetch recent media for a given username, return List of medias
|
||||
"""
|
||||
|
||||
DEBUG(f"Fetching recent media for {username}...")
|
||||
user_id = cl.user_id_from_username(username)
|
||||
medias = cl.user_medias(user_id, amount)
|
||||
final_medias = []
|
||||
for media in medias:
|
||||
sleep(SHORT)
|
||||
if media.media_type == 1:
|
||||
final_medias.append(media)
|
||||
return final_medias
|
||||
|
||||
|
||||
def get_user_image_urls(username, amount=30) -> List[str]:
|
||||
"""
|
||||
Fetch recent media URLs for a given username, return List of media URLs
|
||||
"""
|
||||
DEBUG(f"Fetching recent media URLs for {username}...")
|
||||
user_id = cl.user_id_from_username(username)
|
||||
medias = cl.user_medias(user_id, amount)
|
||||
|
||||
urls = []
|
||||
for media in medias:
|
||||
sleep(SHORT)
|
||||
if media.media_type == 1 and media.thumbnail_url:
|
||||
urls.append(media.thumbnail_url)
|
||||
|
||||
return urls
|
||||
|
||||
def is_valid_url(url):
|
||||
try:
|
||||
result = urlparse(url)
|
||||
return all([result.scheme, result.netloc])
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def get_random_follower():
|
||||
followers = cl.get_followers_usernames()
|
||||
sleep(SHORT)
|
||||
return random.choice(followers)
|
||||
|
||||
|
||||
def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = None, amount:int = args.count):
|
||||
if not ht_type:
|
||||
ht_type = args.commentmode
|
||||
DEBUG(f"Fetching {ht_type} media for hashtag: {hashtag}")
|
||||
ht_medias = []
|
||||
while True:
|
||||
sleep(SHORT)
|
||||
if ht_type == "top":
|
||||
ht_medias.extend(cl.hashtag_medias_top(name=hashtag, amount=amount*10))
|
||||
elif ht_type == "recent":
|
||||
ht_medias.extend(cl.hashtag_medias_recent(name=hashtag, amount=amount*10))
|
||||
|
||||
filtered_medias = filter_medias(ht_medias, days_ago_max=days_ago_max)
|
||||
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}")
|
||||
|
||||
if len(filtered_medias) >= amount:
|
||||
DEBUG(f"Desired amount of {amount} filtered media reached.")
|
||||
break
|
||||
|
||||
return filtered_medias
|
||||
|
||||
def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int = args.count):
|
||||
if not ht_type:
|
||||
ht_type = args.commentmode
|
||||
DEBUG(f"Fetching {ht_type} media.")
|
||||
filtered_medias = []
|
||||
while len(filtered_medias) < amount:
|
||||
hashtag = random.choice(HASHTAGS)
|
||||
DEBUG(f"Using hashtag: {hashtag}")
|
||||
fetched_medias = []
|
||||
sleep(SHORT)
|
||||
if ht_type == "top":
|
||||
fetched_medias = cl.hashtag_medias_top(name=hashtag, amount=50) # Fetch a large batch to filter from
|
||||
elif ht_type == "recent":
|
||||
fetched_medias = cl.hashtag_medias_recent(name=hashtag, amount=50) # Same for recent
|
||||
|
||||
current_filtered_medias = filter_medias(fetched_medias, days_ago_max=days_ago_max)
|
||||
filtered_medias.extend(current_filtered_medias)
|
||||
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}")
|
||||
|
||||
# Trim the list if we've collected more than needed
|
||||
if len(filtered_medias) > amount:
|
||||
filtered_medias = filtered_medias[:amount]
|
||||
DEBUG(f"Desired amount of {amount} filtered media reached.")
|
||||
break
|
||||
else:
|
||||
DEBUG(f"Total filtered media count so far: {len(filtered_medias)}")
|
||||
|
||||
return filtered_medias
|
||||
|
||||
def filter_medias(
|
||||
medias: List,
|
||||
like_count_min=None,
|
||||
like_count_max=None,
|
||||
comment_count_min=None,
|
||||
comment_count_max=None,
|
||||
days_ago_max=None,
|
||||
):
|
||||
# Adjust to use your preferred timezone, for example, UTC
|
||||
days_back = date.now(pytz.utc) - timedelta(days=days_ago_max) if days_ago_max else None
|
||||
return [
|
||||
media for media in medias
|
||||
if (
|
||||
(like_count_min is None or media.like_count >= like_count_min) and
|
||||
(like_count_max is None or media.like_count <= like_count_max) and
|
||||
(comment_count_min is None or media.comment_count >= comment_count_min) and
|
||||
(comment_count_max is None or media.comment_count <= comment_count_max) and
|
||||
(days_ago_max is None or (media.taken_at and media.taken_at > days_back)) and not
|
||||
check_media_in_completed_lists(media)
|
||||
)
|
||||
]
|
||||
|
||||
def add_media_to_completed_lists(media):
|
||||
"""
|
||||
Add a media to the completed lists after interacting with it.
|
||||
"""
|
||||
with open(COMPLETED_MEDIA_LOG, 'a') as file:
|
||||
file.write(f"{str(media.pk)}\n")
|
||||
|
||||
|
||||
def check_media_in_completed_lists(media):
|
||||
"""
|
||||
Check if a media is in the completed lists.
|
||||
"""
|
||||
with open(COMPLETED_MEDIA_LOG, 'r') as file:
|
||||
completed_media = file.read().splitlines()
|
||||
return str(media.pk) in completed_media
|
||||
|
||||
|
||||
|
||||
def download_and_resize_image(url: str, download_path: str = None, max_dimension: int = 1200) -> str:
|
||||
if not isinstance(url, str):
|
||||
url = str(url)
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
if not download_path or not os.path.isdir(os.path.dirname(download_path)):
|
||||
_, temp_file_extension = os.path.splitext(parsed_url.path)
|
||||
if not temp_file_extension:
|
||||
temp_file_extension = ".jpg" # Default extension if none is found
|
||||
download_path = tempfile.mktemp(suffix=temp_file_extension, prefix="download_")
|
||||
|
||||
if url and parsed_url.scheme and parsed_url.netloc:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(download_path), exist_ok=True)
|
||||
with requests.get(url) as response:
|
||||
response.raise_for_status() # Raises an HTTPError if the response was an error
|
||||
image = Image.open(BytesIO(response.content))
|
||||
|
||||
# Resize the image, preserving aspect ratio
|
||||
if max(image.size) > max_dimension:
|
||||
image.thumbnail((max_dimension, max_dimension))
|
||||
|
||||
# Save the image, preserving the original format if possible
|
||||
image_format = image.format if image.format else "jpg"
|
||||
image.save(download_path, image_format)
|
||||
|
||||
return download_path
|
||||
except Exception as e:
|
||||
# Handle or log the error as needed
|
||||
DEBUG(f"Error downloading or resizing image: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def comment_on_user_media(user: str, comment_type: str = "default", amount=5):
|
||||
"""
|
||||
Comment on a user's media.
|
||||
"""
|
||||
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr']
|
||||
medias = get_user_media(user, amount)
|
||||
for media in medias:
|
||||
if not check_media_in_completed_lists(media):
|
||||
sleep(SHORT)
|
||||
if media.thumbnail_url and is_valid_url(media.thumbnail_url):
|
||||
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
|
||||
if media_path is not None:
|
||||
encoded_media = encode_image_to_base64(media_path)
|
||||
comment_text = llava(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
|
||||
if comment_text:
|
||||
cl.media_comment(media.pk, comment_text)
|
||||
DEBUG(f"Commented on media: {media.pk}")
|
||||
else:
|
||||
DEBUG(f"Failed to generate comment for media: {media.pk}")
|
||||
add_media_to_completed_lists(media)
|
||||
sleep(SHORT)
|
||||
else:
|
||||
DEBUG(f"We received a nonetype! {media_path}")
|
||||
else:
|
||||
DEBUG(f"URL for {media.pk} disappeared it seems...")
|
||||
else:
|
||||
DEBUG(f"Media already interacted with: {media.pk}")
|
||||
|
||||
def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, hashtag: str = None):
|
||||
"""
|
||||
Comment on a hashtag's media.
|
||||
"""
|
||||
if not hashtag:
|
||||
hashtag = random.choice(PROFILE_CONFIG['comments'][comment_type]['hashtags'])
|
||||
|
||||
medias = get_medias_by_hashtag(hashtag=hashtag, days_ago_max=7, amount=amount)
|
||||
|
||||
for media in medias:
|
||||
if not check_media_in_completed_lists(media):
|
||||
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
|
||||
comment_text = None
|
||||
|
||||
if media_path and os.path.exists(media_path):
|
||||
encoded_media = encode_image_to_base64(media_path)
|
||||
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
|
||||
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
|
||||
|
||||
if (PROFILE_CONFIG['comments'][comment_type]['sentiment'] == "positive") and False is True:
|
||||
try:
|
||||
like_result = cl.media_like(media)
|
||||
if like_result:
|
||||
DEBUG(f"Liked media: https://instagram.com/p/{media.pk}/")
|
||||
except instagrapi.exceptions.FeedbackRequired as e:
|
||||
DEBUG(f"Cannot like media {media.pk}: {str(e)}")
|
||||
|
||||
if comment_text:
|
||||
try:
|
||||
cl.media_comment(media.pk, comment_text)
|
||||
DEBUG(f"Commented on media: https://instagram.com/p/{media.pk}/")
|
||||
except instagrapi.exceptions.FeedbackRequired as e:
|
||||
DEBUG(f"Cannot comment on media {media.pk}: {str(e)}")
|
||||
else:
|
||||
DEBUG(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}")
|
||||
add_media_to_completed_lists(media)
|
||||
sleep(SHORT)
|
||||
else:
|
||||
DEBUG(f"Media already interacted with: {media.pk}")
|
||||
|
||||
|
||||
def comment_on_specific_media(media_url, comment_type: str = "default"):
|
||||
"""
|
||||
Comment on a specific media given its URL.
|
||||
"""
|
||||
media_id = cl.media_pk_from_url(media_url)
|
||||
sleep(SHORT)
|
||||
media = cl.media_info(media_id)
|
||||
sleep(SHORT)
|
||||
|
||||
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
|
||||
encoded_media = encode_image_to_base64(media_path)
|
||||
|
||||
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
|
||||
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
|
||||
|
||||
if comment_text:
|
||||
try:
|
||||
cl.media_comment(media.pk, comment_text)
|
||||
DEBUG(f"Commented on specific media: https://instagram.com/p/{media.pk}/")
|
||||
except instagrapi.exceptions.FeedbackRequired as e:
|
||||
DEBUG(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}")
|
||||
else:
|
||||
DEBUG(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/")
|
||||
|
||||
|
||||
|
||||
def get_image(status_data, key):
|
||||
"""Extract the filename and subfolder from the status data and read the file."""
|
||||
try:
|
||||
outputs = status_data.get("outputs", {})
|
||||
images_info = outputs.get(key, {}).get("images", [])
|
||||
if not images_info:
|
||||
raise Exception("No images found in the job output.")
|
||||
|
||||
image_info = images_info[0] # Assuming the first image is the target
|
||||
filename = image_info.get("filename")
|
||||
subfolder = image_info.get("subfolder", "") # Default to empty if not present
|
||||
file_path = os.path.join(COMFYUI_OUTPUT_DIR, subfolder, filename)
|
||||
|
||||
with open(file_path, 'rb') as file:
|
||||
return file.read()
|
||||
except KeyError as e:
|
||||
raise Exception(f"Failed to extract image information due to missing key: {e}")
|
||||
except FileNotFoundError:
|
||||
raise Exception(f"File {filename} not found at the expected path {file_path}")
|
||||
|
||||
|
||||
def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], path=None):
|
||||
if path is None:
|
||||
path = []
|
||||
|
||||
try:
|
||||
if isinstance(workflow, dict):
|
||||
for key, value in workflow.items():
|
||||
current_path = path + [key]
|
||||
|
||||
if isinstance(value, dict):
|
||||
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
|
||||
found_key[0] = key
|
||||
update_prompt(value, post, positive, found_key, current_path)
|
||||
elif isinstance(value, list):
|
||||
# Recursive call with updated path for each item in a list
|
||||
for index, item in enumerate(value):
|
||||
update_prompt(item, post, positive, found_key, current_path + [str(index)])
|
||||
|
||||
if value == "API_PPrompt":
|
||||
workflow[key] = post.get(value, "") + positive
|
||||
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
|
||||
elif value == "API_SPrompt":
|
||||
workflow[key] = post.get(value, "")
|
||||
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
|
||||
elif value == "API_NPrompt":
|
||||
workflow[key] = post.get(value, "")
|
||||
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
|
||||
elif key == "seed" or key == "noise_seed":
|
||||
workflow[key] = random.randint(1000000000000, 9999999999999)
|
||||
DEBUG(f"Updated seed to: {workflow[key]}")
|
||||
elif (key == "width" or key == "max_width" or key == "scaled_width" or key == "side_length") and (value == 1023 or value == 1025):
|
||||
# workflow[key] = post.get(value, "")
|
||||
workflow[key] = post.get("width", 1024)
|
||||
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
|
||||
# workflow[key] = post.get(value, "")
|
||||
workflow[key] = post.get("height", 1024)
|
||||
except Exception as e:
|
||||
DEBUG(f"Error in update_prompt at path {' -> '.join(path)}: {e}")
|
||||
raise
|
||||
|
||||
return found_key[0]
|
||||
|
||||
def update_prompt_custom(workflow: dict, API_PPrompt: str, API_SPrompt: str, API_NPrompt: str, found_key=[None], path=None):
|
||||
if path is None:
|
||||
path = []
|
||||
|
||||
try:
|
||||
if isinstance(workflow, dict):
|
||||
for key, value in workflow.items():
|
||||
current_path = path + [key]
|
||||
|
||||
if isinstance(value, dict):
|
||||
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
|
||||
found_key[0] = key
|
||||
update_prompt(value, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path)
|
||||
elif isinstance(value, list):
|
||||
# Recursive call with updated path for each item in a list
|
||||
for index, item in enumerate(value):
|
||||
update_prompt(item, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path + [str(index)])
|
||||
|
||||
if value == "API_PPrompt":
|
||||
workflow[key] = API_PPrompt
|
||||
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
|
||||
elif value == "API_SPrompt":
|
||||
workflow[key] = API_SPrompt
|
||||
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
|
||||
elif value == "API_NPrompt":
|
||||
workflow[key] = API_NPrompt
|
||||
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
|
||||
elif key == "seed" or key == "noise_seed":
|
||||
workflow[key] = random.randint(1000000000000, 9999999999999)
|
||||
DEBUG(f"Updated seed to: {workflow[key]}")
|
||||
elif (key == "width" or key == "max_width" or key == "scaled_width") and (value == 1023 or value == 1025):
|
||||
workflow[key] = 1024
|
||||
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
|
||||
workflow[key] = 1024
|
||||
except Exception as e:
|
||||
DEBUG(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}")
|
||||
raise
|
||||
|
||||
return found_key[0]
|
||||
|
||||
|
||||
##################################
|
||||
### IMAGE GENERATION FUNCTIONS ###
|
||||
##################################
|
||||
|
||||
|
||||
def image_gen(prompt: str, model: str):
|
||||
|
||||
response = IMG_GEN.images.generate(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
size="1024x1024",
|
||||
quality="standard",
|
||||
n=1,
|
||||
)
|
||||
|
||||
image_url = response.data[0].url
|
||||
image_path = download_and_resize_image(image_url)
|
||||
return image_path
|
||||
|
||||
|
||||
def queue_prompt(prompt: dict):
|
||||
response = requests.post(f"{COMFYUI_URL}/prompt", json={"prompt": prompt, "client_id": CLIENT_ID})
|
||||
if response.status_code == 200:
|
||||
return response.json().get('prompt_id')
|
||||
else:
|
||||
raise Exception(f"Failed to queue prompt. Status code: {response.status_code}, Response body: {response.text}")
|
||||
|
||||
def poll_status(prompt_id):
|
||||
"""Poll the job status until it's complete and return the status data."""
|
||||
start_time = time.time() # Record the start time
|
||||
while True:
|
||||
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
|
||||
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
|
||||
# Use \r to return to the start of the line, and end='' to prevent newline
|
||||
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
|
||||
if status_response.status_code != 200:
|
||||
raise Exception("Failed to get job status")
|
||||
status_data = status_response.json()
|
||||
job_data = status_data.get(prompt_id, {})
|
||||
if job_data.get("status", {}).get("completed", False):
|
||||
DEBUG()
|
||||
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||
return job_data
|
||||
time.sleep(1)
|
||||
|
||||
def poll_status(prompt_id):
|
||||
"""Poll the job status until it's complete and return the status data."""
|
||||
start_time = time.time() # Record the start time
|
||||
while True:
|
||||
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
|
||||
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
|
||||
# Use \r to return to the start of the line, and end='' to prevent newline
|
||||
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
|
||||
if status_response.status_code != 200:
|
||||
raise Exception("Failed to get job status")
|
||||
status_data = status_response.json()
|
||||
job_data = status_data.get(prompt_id, {})
|
||||
if job_data.get("status", {}).get("completed", False):
|
||||
DEBUG()
|
||||
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||
return job_data
|
||||
time.sleep(1)
|
||||
|
||||
################################
|
||||
### PRIMARY ACTIVE FUNCTIONS ###
|
||||
################################
|
||||
|
||||
def load_post(chosen_post: str = "default"):
|
||||
if chosen_post in PROFILE_CONFIG['posts']:
|
||||
post = PROFILE_CONFIG['posts'][chosen_post]
|
||||
DEBUG(f"Loaded post for {chosen_post}")
|
||||
else:
|
||||
DEBUG(f"Unable to load post for {chosen_post}. Choosing a default post.")
|
||||
chosen_post = choose_post(PROFILE_CONFIG['posts'])
|
||||
post = PROFILE_CONFIG['posts'][chosen_post]
|
||||
DEBUG(f"Defaulted to {chosen_post}")
|
||||
|
||||
return post
|
||||
|
||||
def handle_image_workflow(chosen_post=None):
|
||||
"""
|
||||
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
|
||||
or posting to Instagram based on the local flag.
|
||||
"""
|
||||
if chosen_post is None:
|
||||
chosen_post = choose_post(PROFILE_CONFIG['posts'])
|
||||
|
||||
post = load_post(chosen_post)
|
||||
|
||||
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
|
||||
|
||||
DEBUG(f"Workflow name: {workflow_name}")
|
||||
|
||||
DEBUG(f"Generating image concept for {chosen_post} and {workflow_name} now.")
|
||||
image_concept = query_ollama(llmPrompt = post['llmPrompt'], max_tokens = 180) if args.local or not args.openai else query_gpt4(llmPrompt = post['llmPrompt'], max_tokens = 180)
|
||||
|
||||
DEBUG(f"Image concept for {chosen_post}: {image_concept}")
|
||||
|
||||
workflow_data = None
|
||||
|
||||
if args.fast:
|
||||
workflow_data = load_json(None, f"{workflow_name}_fast")
|
||||
|
||||
if workflow_data is None:
|
||||
workflow_data = load_json(None, workflow_name)
|
||||
|
||||
if args.dalle and not args.local:
|
||||
jpg_file_path = image_gen(image_concept, "dall-e-3")
|
||||
else:
|
||||
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
|
||||
DEBUG(f"Saved file key: {saved_file_key}")
|
||||
prompt_id = queue_prompt(workflow_data)
|
||||
DEBUG(f"Prompt ID: {prompt_id}")
|
||||
status_data = poll_status(prompt_id)
|
||||
image_data = get_image(status_data, saved_file_key)
|
||||
if chosen_post == "landscape":
|
||||
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 2880, 100)
|
||||
else:
|
||||
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
|
||||
|
||||
image_aftergen(jpg_file_path, chosen_post)
|
||||
|
||||
def handle_custom_image(custom_post: str):
|
||||
"""
|
||||
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
|
||||
or posting to Instagram based on the local flag.
|
||||
"""
|
||||
if args.posttype:
|
||||
post = load_post(args.posttype)
|
||||
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
|
||||
|
||||
else:
|
||||
workflow_name = args.workflow if args.workflow else "selfie"
|
||||
post = {
|
||||
"API_PPrompt": "",
|
||||
"API_SPrompt": "; (((masterpiece))); (beautiful lighting:1), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
|
||||
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3",
|
||||
"Vision_Prompt": "Write an upbeat Instagram description with emojis to accompany this selfie!",
|
||||
"frequency": 2,
|
||||
"ghost_tags": [
|
||||
"aigenerated",
|
||||
"stablediffusion",
|
||||
"sdxl",
|
||||
],
|
||||
}
|
||||
|
||||
workflow_data = load_json(None, workflow_name)
|
||||
|
||||
system_msg = "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words."
|
||||
image_concept = query_ollama(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) if args.local or not args.openai else query_gpt4(system_msg=system_msg, user_msg=custom_post, max_tokens = 180)
|
||||
|
||||
DEBUG(f"Image concept: {image_concept}")
|
||||
|
||||
if args.dalle and not args.local:
|
||||
jpg_file_path = image_gen(image_concept, "dall-e-3")
|
||||
|
||||
else:
|
||||
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
|
||||
DEBUG(f"Saved file key: {saved_file_key}")
|
||||
|
||||
prompt_id = queue_prompt(workflow_data)
|
||||
DEBUG(f"Prompt ID: {prompt_id}")
|
||||
|
||||
status_data = poll_status(prompt_id)
|
||||
image_data = get_image(status_data, saved_file_key)
|
||||
chosen_post = args.posttype if args.posttype else "custom"
|
||||
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
|
||||
|
||||
encoded_string = encode_image_to_base64(jpg_file_path)
|
||||
vision_prompt = f"Write upbeat Instagram description accompany this image, which was created by AI using the following prompt: {image_concept}"
|
||||
instagram_description = llava(encoded_string, vision_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, vision_prompt, 150)
|
||||
|
||||
|
||||
image_aftergen(jpg_file_path, chosen_post, )
|
||||
|
||||
|
||||
def image_aftergen(jpg_file_path: str, chosen_post: str = None, post: Dict = None, prompt: str = None):
|
||||
if chosen_post and not prompt:
|
||||
prompt = PROFILE_CONFIG['posts'][chosen_post]['Vision_Prompt']
|
||||
encoded_string = encode_image_to_base64(jpg_file_path)
|
||||
DEBUG(f"Image successfully encoded from {jpg_file_path}")
|
||||
instagram_description = llava(encoded_string, prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, prompt, 150)
|
||||
instagram_description = re.sub(r'^["\'](.*)["\']$', r'\1', instagram_description)
|
||||
|
||||
ghost_tags = post['ghost_tags'] if post else PROFILE_CONFIG['posts'][chosen_post]['ghost_tags']
|
||||
|
||||
title_prompt = f"Generate a short 3-5 word title for this image, which already includes the following description: {instagram_description}"
|
||||
|
||||
# Generate img_title based on the condition provided
|
||||
img_title = llava(encoded_string, title_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, title_prompt, 150)
|
||||
img_title = re.sub(r'^["\'](.*)["\']$', r'\1', img_title)
|
||||
|
||||
# Save description to file and upload or save locally
|
||||
description_filename = jpg_file_path.rsplit('.', 1)[0] + ".txt"
|
||||
description_path = os.path.join(IG_IMAGES_DIR, description_filename)
|
||||
with open(description_path, "w") as desc_file:
|
||||
desc_file.write(instagram_description)
|
||||
|
||||
# Initial markdown content creation
|
||||
markdown_filename = jpg_file_path.rsplit('.', 1)[0] + ".md"
|
||||
markdown_content = f"""# {img_title}
|
||||
|
||||
![{img_title}]({jpg_file_path})
|
||||
---
|
||||
{instagram_description}
|
||||
---
|
||||
Tags: {', '.join(ghost_tags)}
|
||||
"""
|
||||
with open(markdown_filename, "w") as md_file:
|
||||
md_file.write(markdown_content)
|
||||
|
||||
DEBUG(f"Markdown file created at {markdown_filename}")
|
||||
|
||||
if args.wallpaper:
|
||||
change_wallpaper(jpg_file_path)
|
||||
DEBUG(f"Wallpaper changed.")
|
||||
|
||||
|
||||
if not args.local:
|
||||
ig_footer = ""
|
||||
if not args.noig:
|
||||
post_url = upload_photo(jpg_file_path, instagram_description)
|
||||
DEBUG(f"Image posted at {post_url}")
|
||||
ig_footer = f"\n<a href=\"{post_url}\">Instagram link</a>"
|
||||
|
||||
if not args.noghost:
|
||||
ghost_text = f"{instagram_description}"
|
||||
ghost_url = post_to_ghost(img_title, jpg_file_path, ghost_text, ghost_tags)
|
||||
DEBUG(f"Ghost post: {ghost_url}\n{ig_footer}")
|
||||
|
||||
|
||||
def choose_post(posts):
|
||||
total_frequency = sum(posts[post_type]['frequency'] for post_type in posts)
|
||||
random_choice = random.randint(1, total_frequency)
|
||||
current_sum = 0
|
||||
|
||||
for post_type, post_info in posts.items():
|
||||
current_sum += post_info['frequency']
|
||||
if random_choice <= current_sum:
|
||||
return post_type
|
||||
|
||||
def load_json(json_payload, workflow):
|
||||
if json_payload:
|
||||
return json.loads(json_payload)
|
||||
elif workflow:
|
||||
workflow_path = os.path.join(SD_WORKFLOWS_DIR, f"{workflow}.json" if not workflow.endswith('.json') else workflow)
|
||||
with open(workflow_path, 'r') as file:
|
||||
return json.load(file)
|
||||
else:
|
||||
raise ValueError("No valid input provided.")
|
||||
|
||||
|
||||
|
||||
|
||||
def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, quality=80):
|
||||
chosen_post = chosen_post if chosen_post else "custom"
|
||||
filename_png = f"{prompt_id}.png"
|
||||
category_dir = os.path.join(IG_IMAGES_DIR, chosen_post)
|
||||
image_path_png = os.path.join(category_dir, filename_png)
|
||||
|
||||
try:
|
||||
# Ensure the directory exists
|
||||
os.makedirs(category_dir, exist_ok=True)
|
||||
|
||||
# Save the raw PNG data to a file
|
||||
with open(image_path_png, 'wb') as file:
|
||||
file.write(image_data)
|
||||
|
||||
# Open the PNG, resize it, and save it as jpg
|
||||
with Image.open(image_path_png) as img:
|
||||
# Resize image if necessary
|
||||
if max(img.size) > max_size:
|
||||
ratio = max_size / max(img.size)
|
||||
new_size = tuple([int(x * ratio) for x in img.size])
|
||||
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
||||
|
||||
# Prepare the path for the converted image
|
||||
new_file_name = f"{prompt_id}.jpg"
|
||||
new_file_path = os.path.join(category_dir, new_file_name)
|
||||
|
||||
# Convert to jpg and save
|
||||
img.convert('RGB').save(new_file_path, format='JPEG', quality=quality)
|
||||
|
||||
# Optionally, delete the temporary PNG file
|
||||
os.remove(image_path_png)
|
||||
|
||||
return new_file_path
|
||||
except Exception as e:
|
||||
DEBUG(f"Error processing image: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def upload_photo(path, caption, title: str=None):
|
||||
DEBUG(f"Uploading photo from {path}...")
|
||||
media = cl.photo_upload(path, caption)
|
||||
post_url = f"https://www.instagram.com/p/{media.code}/"
|
||||
return post_url
|
||||
|
||||
def format_duration(seconds):
|
||||
"""Return a string representing the duration in a human-readable format."""
|
||||
if seconds < 120:
|
||||
return f"{int(seconds)} sec"
|
||||
elif seconds < 6400:
|
||||
return f"{int(seconds // 60)} min"
|
||||
else:
|
||||
return f"{seconds / 3600:.2f} hr"
|
||||
|
||||
########################
|
||||
### HELPER FUNCTIONS ###
|
||||
########################
|
||||
|
||||
import subprocess
|
||||
|
||||
def change_wallpaper(image_path):
|
||||
command = """
|
||||
osascript -e 'tell application "Finder" to set desktop picture to POSIX file "{}"'
|
||||
""".format(image_path)
|
||||
subprocess.run(command, shell=True)
|
||||
|
||||
|
||||
def sleep(seconds):
|
||||
"""Sleep for a random amount of time, approximately the given number of seconds."""
|
||||
sleepupto(seconds*0.66, seconds*1.5)
|
||||
|
||||
def sleepupto(min_seconds, max_seconds=None):
|
||||
interval = random.uniform(min_seconds if max_seconds is not None else 0, max_seconds if max_seconds is not None else min_seconds)
|
||||
start_time = time.time()
|
||||
end_time = start_time + interval
|
||||
|
||||
with tqdm(total=interval, desc=f"Sleeping for {format_duration(interval)}", unit=" sec", ncols=75, bar_format='{desc}: {bar} {remaining}') as pbar:
|
||||
while True:
|
||||
current_time = time.time()
|
||||
elapsed_time = current_time - start_time
|
||||
remaining_time = end_time - current_time
|
||||
if elapsed_time >= interval:
|
||||
break
|
||||
duration = min(1, interval - elapsed_time) # Adjust sleep time to not exceed interval
|
||||
time.sleep(duration)
|
||||
pbar.update(duration)
|
||||
# Update remaining time display
|
||||
pbar.set_postfix_str(f"{format_duration(remaining_time)} remaining")
|
||||
|
||||
|
||||
########################
|
||||
### GHOST FUNCTIONS ###
|
||||
########################
|
||||
|
||||
|
||||
|
||||
def generate_jwt_token():
|
||||
key_id, key_secret = GHOST_API_KEY.split(':')
|
||||
iat = int(date.now().timestamp())
|
||||
exp = iat + 5 * 60 # Token expiration time set to 5 minutes from now for consistency with the working script
|
||||
payload = {
|
||||
'iat': iat,
|
||||
'exp': exp,
|
||||
'aud': '/admin/' # Adjusted to match the working script
|
||||
}
|
||||
token = jwt.encode(payload, bytes.fromhex(key_secret), algorithm='HS256', headers={'kid': key_id})
|
||||
return token.decode('utf-8') if isinstance(token, bytes) else token # Ensure the token is decoded to UTF-8 string
|
||||
|
||||
|
||||
def post_to_ghost(title, image_path, html_content, ghost_tags):
|
||||
jwt_token = generate_jwt_token()
|
||||
ghost_headers = {'Authorization': f'Ghost {jwt_token}'}
|
||||
|
||||
# Upload the image to Ghost
|
||||
with open(image_path, 'rb') as f:
|
||||
files = {'file': (os.path.basename(image_path), f, 'image/jpg')}
|
||||
image_response = requests.post(f"{GHOST_API_URL}/images/upload/", headers=ghost_headers, files=files)
|
||||
image_response.raise_for_status() # Ensure the request was successful
|
||||
image_url = image_response.json()['images'][0]['url']
|
||||
|
||||
# Prepare the post content
|
||||
updated_html_content = f'<img src="{image_url}" alt="Image"/><hr/> {html_content}'
|
||||
mobiledoc = {
|
||||
"version": "0.3.1",
|
||||
"atoms": [],
|
||||
"cards": [["html", {"cardName": "html", "html": updated_html_content}]],
|
||||
"markups": [],
|
||||
"sections": [[10, 0]]
|
||||
}
|
||||
mobiledoc = json.dumps(mobiledoc)
|
||||
|
||||
post_data = {
|
||||
'posts': [{
|
||||
'title': title,
|
||||
'mobiledoc': mobiledoc,
|
||||
'status': 'published',
|
||||
'tags': ghost_tags
|
||||
}]
|
||||
}
|
||||
|
||||
# Create a new post
|
||||
post_response = requests.post(f"{GHOST_API_URL}/posts/", json=post_data, headers=ghost_headers)
|
||||
post_response.raise_for_status()
|
||||
post_url = post_response.json()['posts'][0]['url']
|
||||
|
||||
return post_url
|
||||
|
||||
|
||||
|
||||
########################################################
|
||||
@ig.post("/ig/flow")
|
||||
async def ig_flow_endpoint(new_session: bool = False):
|
||||
current_unix_time = int(date.now().timestamp())
|
||||
time_since_rollover = current_unix_time - rollover_time
|
||||
time_remaining = 30 - (time_since_rollover % 30)
|
||||
|
||||
if time_remaining < 4:
|
||||
DEBUG("Too close to end of TOTP counter. Waiting.")
|
||||
sleepupto(5, 5)
|
||||
|
||||
if not new_session and os.path.exists(IG_SESSION_PATH):
|
||||
cl.load_settings(IG_SESSION_PATH)
|
||||
DEBUG("Loaded past session.")
|
||||
|
||||
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
|
||||
cl.dump_settings(IG_SESSION_PATH)
|
||||
DEBUG("Logged in and saved new session.")
|
||||
|
||||
else:
|
||||
raise Exception(f"Failed to login as {IG_USERNAME}.")
|
484
sijapi/routers/llm.py
Normal file
484
sijapi/routers/llm.py
Normal file
|
@ -0,0 +1,484 @@
|
|||
#routers/llm.py
|
||||
from fastapi import APIRouter, HTTPException, Request, Response
|
||||
from fastapi.responses import StreamingResponse, JSONResponse
|
||||
from starlette.responses import StreamingResponse
|
||||
from datetime import datetime as dt_datetime
|
||||
from dateutil import parser
|
||||
from typing import List, Dict, Any, Union
|
||||
from pydantic import BaseModel, root_validator, ValidationError
|
||||
import aiofiles
|
||||
import os
|
||||
import glob
|
||||
import chromadb
|
||||
from openai import OpenAI
|
||||
import uuid
|
||||
import json
|
||||
import base64
|
||||
from pathlib import Path
|
||||
import ollama
|
||||
from ollama import AsyncClient as Ollama, list as OllamaList
|
||||
import aiofiles
|
||||
import time
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from fastapi import FastAPI, Request, HTTPException, APIRouter
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from dotenv import load_dotenv
|
||||
from sijapi import BASE_DIR, DATA_DIR, LOGS_DIR, CONFIG_DIR, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi.utilities import convert_to_unix_time, sanitize_filename
|
||||
|
||||
llm = APIRouter()
|
||||
|
||||
|
||||
|
||||
# Initialize chromadb client
|
||||
client = chromadb.Client()
|
||||
OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
|
||||
|
||||
# Function to read all markdown files in the folder
|
||||
def read_markdown_files(folder: Path):
|
||||
file_paths = glob.glob(os.path.join(folder, "*.md"))
|
||||
documents = []
|
||||
for file_path in file_paths:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
documents.append(file.read())
|
||||
return documents, file_paths
|
||||
|
||||
# Read markdown files and generate embeddings
|
||||
documents, file_paths = read_markdown_files(DOC_DIR)
|
||||
for i, doc in enumerate(documents):
|
||||
response = ollama.embeddings(model="mxbai-embed-large", prompt=doc)
|
||||
embedding = response["embedding"]
|
||||
OBSIDIAN_CHROMADB_COLLECTION.add(
|
||||
ids=[file_paths[i]],
|
||||
embeddings=[embedding],
|
||||
documents=[doc]
|
||||
)
|
||||
|
||||
# Function to retrieve the most relevant document given a prompt
|
||||
@llm.get("/retrieve_document/{prompt}")
|
||||
async def retrieve_document(prompt: str):
|
||||
response = ollama.embeddings(
|
||||
prompt=prompt,
|
||||
model="mxbai-embed-large"
|
||||
)
|
||||
results = OBSIDIAN_CHROMADB_COLLECTION.query(
|
||||
query_embeddings=[response["embedding"]],
|
||||
n_results=1
|
||||
)
|
||||
return {"document": results['documents'][0][0]}
|
||||
|
||||
# Function to generate a response using RAG
|
||||
@llm.get("/generate_response/{prompt}")
|
||||
async def generate_response(prompt: str):
|
||||
data = await retrieve_document(prompt)
|
||||
output = ollama.generate(
|
||||
model="llama2",
|
||||
prompt=f"Using this data: {data['document']}. Respond to this prompt: {prompt}"
|
||||
)
|
||||
return {"response": output['response']}
|
||||
|
||||
|
||||
async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, max_tokens: int = 200):
|
||||
messages = [{"role": "system", "content": sys},
|
||||
{"role": "user", "content": usr}]
|
||||
LLM = Ollama()
|
||||
response = await LLM.chat(model=DEFAULT_LLM, messages=messages, options={"num_predict": max_tokens})
|
||||
|
||||
DEBUG(response)
|
||||
if "message" in response:
|
||||
if "content" in response["message"]:
|
||||
content = response["message"]["content"]
|
||||
return content
|
||||
else:
|
||||
DEBUG("No choices found in response")
|
||||
return None
|
||||
|
||||
def is_vision_request(content):
|
||||
return False
|
||||
|
||||
@llm.post("/v1/chat/completions")
|
||||
async def chat_completions(request: Request):
|
||||
body = await request.json()
|
||||
|
||||
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f")
|
||||
filename = REQUESTS_DIR / f"request_{timestamp}.json"
|
||||
|
||||
async with aiofiles.open(filename, mode='w') as file:
|
||||
await file.write(json.dumps(body, indent=4))
|
||||
|
||||
messages = body.get('messages')
|
||||
if not messages:
|
||||
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
|
||||
|
||||
requested_model = body.get('model', 'default-model')
|
||||
DEBUG(f"Requested model: {requested_model}")
|
||||
stream = body.get('stream')
|
||||
token_limit = body.get('max_tokens') or body.get('num_predict')
|
||||
|
||||
# Check if the most recent message contains an image_url
|
||||
recent_message = messages[-1]
|
||||
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
|
||||
DEBUG("Processing as a vision request")
|
||||
model = "llava"
|
||||
DEBUG(f"Using model: {model}")
|
||||
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
|
||||
else:
|
||||
DEBUG("Processing as a standard request")
|
||||
model = requested_model
|
||||
DEBUG(f"Using model: {model}")
|
||||
if stream:
|
||||
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
|
||||
else:
|
||||
response_data = await generate_messages(messages, model)
|
||||
return JSONResponse(response_data, media_type="application/json")
|
||||
|
||||
async def stream_messages(messages: list, model: str = "llama3", num_predict: int = 300):
|
||||
async with Ollama() as async_client:
|
||||
try:
|
||||
index = 0
|
||||
async for part in async_client.chat(model=model, messages=messages, stream=True, options={'num_predict': num_predict}):
|
||||
yield "data: " + json.dumps({
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"system_fingerprint": "fp_44709d6fcb",
|
||||
"choices": [{
|
||||
"index": index,
|
||||
"delta": {"role": "assistant", "content": part['message']['content']},
|
||||
"logprobs": None,
|
||||
"finish_reason": None if 'finish_reason' not in part else part['finish_reason']
|
||||
}]
|
||||
}) + "\n\n"
|
||||
index += 1
|
||||
except Exception as e:
|
||||
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
|
||||
async def stream_messages_with_vision(message: dict, model: str, num_predict: int = 300):
|
||||
async with Ollama() as async_client:
|
||||
try:
|
||||
if isinstance(message.get('content'), list):
|
||||
content = message['content']
|
||||
for part in content:
|
||||
if part['type'] == 'image_url' and 'url' in part['image_url']:
|
||||
image_url = part['image_url']['url']
|
||||
if image_url.startswith('data:image'):
|
||||
# Convert base64 to bytes
|
||||
image_data = base64.b64decode(image_url.split('base64,')[1])
|
||||
response_generator = await async_client.generate(
|
||||
model=model,
|
||||
prompt='explain this image:',
|
||||
images=[image_data],
|
||||
stream=True,
|
||||
options={'num_predict': num_predict}
|
||||
)
|
||||
index = 0
|
||||
async for response in response_generator:
|
||||
yield "data: " + json.dumps({
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"system_fingerprint": "fp_44709d6fcb",
|
||||
"choices": [{
|
||||
"index": index,
|
||||
"delta": {"role": "assistant", "content": response['response']},
|
||||
"logprobs": None,
|
||||
"finish_reason": None if 'finish_reason' not in response else response['finish_reason']
|
||||
}]
|
||||
}) + "\n\n"
|
||||
index += 1
|
||||
except Exception as e:
|
||||
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
def get_appropriate_model(requested_model):
|
||||
if requested_model == "gpt-4-vision-preview":
|
||||
return DEFAULT_VISION
|
||||
elif not is_model_available(requested_model):
|
||||
return DEFAULT_LLM
|
||||
else:
|
||||
return requested_model
|
||||
|
||||
def is_vision_request(content):
|
||||
if isinstance(content, list):
|
||||
return any(isinstance(msg, dict) and msg.get('type') == 'image_url' for msg in content)
|
||||
return False
|
||||
|
||||
|
||||
@llm.get("/v1/models")
|
||||
async def get_models():
|
||||
model_data = OllamaList()
|
||||
formatted_models = []
|
||||
|
||||
for model in model_data['models']:
|
||||
model_id = model['name'].split(':')[0]
|
||||
formatted_models.append({
|
||||
"id": model_id,
|
||||
"object": "model",
|
||||
"created": convert_to_unix_time(model['modified_at']),
|
||||
"owned_by": "sij"
|
||||
})
|
||||
|
||||
return JSONResponse({
|
||||
"object": "list",
|
||||
"data": formatted_models
|
||||
})
|
||||
|
||||
async def generate_messages(messages: list, model: str = "llama3"):
|
||||
async_client = Ollama()
|
||||
try:
|
||||
response = await async_client.chat(model=model, messages=messages, stream=False)
|
||||
return {
|
||||
"model": model,
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": response['message']['content']
|
||||
}
|
||||
}]
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": f"Error: {str(e)}"}
|
||||
|
||||
|
||||
|
||||
def is_model_available(model_name):
|
||||
model_data = OllamaList()
|
||||
available_models = [model['name'] for model in model_data['models']]
|
||||
DEBUG(f"Available models: {available_models}") # Log using the configured LOGGER
|
||||
|
||||
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
|
||||
if len(matching_models) == 1:
|
||||
DEBUG(f"Unique match found: {matching_models[0]}")
|
||||
return True
|
||||
elif len(matching_models) > 1:
|
||||
WARN(f"Ambiguous match found, models: {matching_models}")
|
||||
return True
|
||||
else:
|
||||
WARN(f"No match found for model: {model_name}")
|
||||
return False
|
||||
|
||||
|
||||
@llm.options("/chat/completions")
|
||||
@llm.options("/v1/chat/completions")
|
||||
async def chat_completions_options(request: Request):
|
||||
return JSONResponse(
|
||||
content={
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "To use the chat completions endpoint, make a POST request to /v1/chat/completions with a JSON payload containing the 'messages' array. Each message should have a 'role' (either 'system', 'user', or 'assistant') and 'content' (the message text). You can optionally specify the 'model' to use. The response will be a JSON object containing the generated completions."
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"created": int(time.time()),
|
||||
"id": str(uuid.uuid4()),
|
||||
"model": DEFAULT_LLM,
|
||||
"object": "chat.completion.chunk",
|
||||
},
|
||||
status_code=200,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
"Allow": "OPTIONS, POST",
|
||||
},
|
||||
)
|
||||
|
||||
#### EMBEDDINGS
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
model: str
|
||||
input: Union[str, List[str], None] = None
|
||||
prompt: Union[str, List[str], None] = None
|
||||
|
||||
@root_validator(pre=True)
|
||||
def ensure_list(cls, values):
|
||||
input_value = values.get('input')
|
||||
prompt_value = values.get('prompt')
|
||||
|
||||
if input_value and isinstance(input_value, str):
|
||||
values['input'] = [input_value]
|
||||
|
||||
if prompt_value and isinstance(prompt_value, str):
|
||||
values['prompt'] = [prompt_value]
|
||||
|
||||
if input_value and not prompt_value:
|
||||
values['prompt'] = values['input']
|
||||
values['input'] = None
|
||||
|
||||
return values
|
||||
|
||||
class EmbeddingResponse(BaseModel):
|
||||
object: str
|
||||
data: List[Dict[str, Any]]
|
||||
model: str
|
||||
usage: Dict[str, int]
|
||||
|
||||
@llm.post("/api/embeddings", response_model=EmbeddingResponse)
|
||||
@llm.post("/v1/embeddings", response_model=EmbeddingResponse)
|
||||
async def create_embedding(request: EmbeddingRequest):
|
||||
try:
|
||||
combined_input = " ".join(request.prompt)
|
||||
response = ollama.embeddings(model=request.model, prompt=combined_input)
|
||||
embedding_list = response.get("embedding", [])
|
||||
|
||||
data = [{
|
||||
"object": "embedding",
|
||||
"index": 0,
|
||||
"embedding": embedding_list
|
||||
}]
|
||||
|
||||
result = {
|
||||
"object": "list",
|
||||
"data": data,
|
||||
"model": request.model,
|
||||
"usage": {"prompt_tokens": 5, "total_tokens": 5} # Example token counts
|
||||
}
|
||||
|
||||
return result
|
||||
except ValidationError as e:
|
||||
raise HTTPException(status_code=422, detail=e.errors())
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@llm.options("/api/embeddings")
|
||||
@llm.options("/v1/embeddings")
|
||||
async def options_embedding():
|
||||
return JSONResponse(
|
||||
content={},
|
||||
headers={
|
||||
"Allow": "OPTIONS, POST",
|
||||
"Content-Type": "application/json",
|
||||
"Access-Control-Allow-Methods": "OPTIONS, POST",
|
||||
"Access-Control-Allow-Headers": "Content-Type"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
###### PORTED FROM IGBOT, NEEDS TO BE UPDATED FOR THIS ENVIRONMENT AND MADE ASYNC: #####
|
||||
|
||||
def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", max_tokens: int = 150):
|
||||
messages = llmPrompt if llmPrompt else [
|
||||
{"role": "system", "content": system_msg},
|
||||
{"role": "user", "content": user_msg}
|
||||
]
|
||||
LLM = OpenAI(api_key=OPENAI_API_KEY)
|
||||
response = LLM.chat.completions.create(
|
||||
model="gpt-4",
|
||||
messages=messages,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
if hasattr(response, "choices") and response.choices: # Checks if 'choices' attribute exists and is not empty
|
||||
first_choice = response.choices[0]
|
||||
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
||||
return first_choice.message.content
|
||||
else:
|
||||
DEBUG("No content attribute in the first choice's message")
|
||||
DEBUG(f"No content found in message string: {response.choices}")
|
||||
DEBUG("Trying again!")
|
||||
query_gpt4(messages, max_tokens)
|
||||
else:
|
||||
DEBUG(f"No content found in message string: {response}")
|
||||
return ""
|
||||
|
||||
def llava(image_base64, prompt):
|
||||
VISION_LLM = Ollama(host='http://localhost:11434')
|
||||
response = VISION_LLM.generate(
|
||||
model = 'llava',
|
||||
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
|
||||
images = [image_base64]
|
||||
)
|
||||
DEBUG(response)
|
||||
return "" if "pass" in response["response"].lower() else response["response"]
|
||||
|
||||
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
|
||||
VISION_LLM = OpenAI(api_key=OPENAI_API_KEY)
|
||||
response_1 = VISION_LLM.chat.completions.create(
|
||||
model="gpt-4-vision-preview",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": f"{prompt_usr}"},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}}
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=max_tokens,
|
||||
stream=False
|
||||
)
|
||||
|
||||
if response_1 and response_1.choices:
|
||||
if len(response_1.choices) > 0:
|
||||
first_choice = response_1.choices[0]
|
||||
if first_choice.message and first_choice.message.content:
|
||||
comment_content = first_choice.message.content
|
||||
if "PASS" in comment_content:
|
||||
return ""
|
||||
DEBUG(f"Generated comment: {comment_content}")
|
||||
|
||||
response_2 = VISION_LLM.chat.completions.create(
|
||||
model="gpt-4-vision-preview",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": f"{prompt_usr}"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpg;base64,{image_base64}"
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": comment_content
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Please refine it, and remember to ONLY include the caption or comment, nothing else! That means no preface, no postscript, no notes, no reflections, and not even any acknowledgment of this follow-up message. I need to be able to use your output directly on social media. Do include emojis though."
|
||||
}
|
||||
],
|
||||
max_tokens=max_tokens,
|
||||
stream=False
|
||||
)
|
||||
if response_2 and response_2.choices:
|
||||
if len(response_2.choices) > 0:
|
||||
first_choice = response_2.choices[0]
|
||||
if first_choice.message and first_choice.message.content:
|
||||
final_content = first_choice.message.content
|
||||
DEBUG(f"Generated comment: {final_content}")
|
||||
if "PASS" in final_content:
|
||||
return ""
|
||||
else:
|
||||
return final_content
|
||||
|
||||
|
||||
DEBUG("Vision response did not contain expected data.")
|
||||
DEBUG(f"Vision response: {response_1}")
|
||||
asyncio.sleep(15)
|
||||
|
||||
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
|
||||
return try_again
|
524
sijapi/routers/locate.py
Normal file
524
sijapi/routers/locate.py
Normal file
|
@ -0,0 +1,524 @@
|
|||
from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import pytz
|
||||
import traceback
|
||||
from datetime import datetime, timezone
|
||||
from typing import Union, List
|
||||
import asyncio
|
||||
import pytz
|
||||
import folium
|
||||
import time as timer
|
||||
from pathlib import Path
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Any, Dict, List, Union
|
||||
from datetime import datetime, timedelta, time
|
||||
from sijapi import LOCATION_OVERRIDES, TZ
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi.utilities import get_db_connection, haversine, localize_dt
|
||||
# from osgeo import gdal
|
||||
# import elevation
|
||||
|
||||
locate = APIRouter()
|
||||
|
||||
class Location(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
datetime: datetime
|
||||
elevation: Optional[float] = None
|
||||
altitude: Optional[float] = None
|
||||
zip: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
country: Optional[str] = None
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
class Config:
|
||||
json_encoders = {
|
||||
datetime: lambda dt: dt.isoformat(),
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def reverse_geocode(latitude, longitude) -> Dict:
|
||||
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={latitude}&lon={longitude}"
|
||||
|
||||
INFO(f"Calling Nominatim API at {url}")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'sij.law/1.0 (sij@sij.law)', # replace with your app name and email
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status() # Raise an exception for unsuccessful requests
|
||||
|
||||
data = response.json()
|
||||
|
||||
result = {
|
||||
"latitude": data.get("lat", ""),
|
||||
"longitude": data.get("lon", ""),
|
||||
"class": data.get("class", ""),
|
||||
"type": data.get("type", ""),
|
||||
"name": data.get("name", ""),
|
||||
"display_name": data.get("display_name", ""),
|
||||
"boundingbox": data.get("boundingbox", [])
|
||||
}
|
||||
|
||||
if "address" in data:
|
||||
address = data["address"]
|
||||
result.update({
|
||||
"amenity": address.get("amenity", ""),
|
||||
"house_number": address.get("house_number", ""),
|
||||
"road": address.get("road", ""),
|
||||
"quarter": address.get("quarter", ""),
|
||||
"neighbourhood": address.get("neighbourhood", ""),
|
||||
"suburb": address.get("suburb", ""),
|
||||
"county": address.get("county", ""),
|
||||
"city": address.get("city", ""),
|
||||
"state": address.get("state", ""),
|
||||
"postcode": address.get("postcode", ""),
|
||||
"country": address.get("country", ""),
|
||||
"country_code": address.get("country_code", "")
|
||||
})
|
||||
INFO(f"{result}")
|
||||
return result
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
ERR(f"Error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
## NOT YET IMPLEMENTED
|
||||
async def geocode(zip_code: Optional[str] = None, latitude: Optional[float] = None, longitude: Optional[float] = None, city: Optional[str] = None, state: Optional[str] = None, country_code: str = 'US') -> Location:
|
||||
if (latitude is None or longitude is None) and (zip_code is None) and (city is None or state is None):
|
||||
ERR(f"Must provide sufficient information for geocoding!")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Establish the database connection
|
||||
conn = get_db_connection()
|
||||
|
||||
# Build the SQL query based on the provided parameters
|
||||
query = "SELECT id, street, city, state, country, latitude, longitude, zip, elevation, datetime, date, ST_Distance(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326)) AS distance FROM Locations"
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if latitude is not None and longitude is not None:
|
||||
conditions.append("ST_DWithin(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326), 50000)") # 50 km radius
|
||||
params.extend([longitude, latitude])
|
||||
|
||||
if zip_code:
|
||||
conditions.append("zip = $3 AND country = $4")
|
||||
params.extend([zip_code, country_code])
|
||||
|
||||
if city and state:
|
||||
conditions.append("city ILIKE $5 AND state ILIKE $6 AND country = $7")
|
||||
params.extend([city, state, country_code])
|
||||
|
||||
if conditions:
|
||||
query += " WHERE " + " OR ".join(conditions)
|
||||
|
||||
query += " ORDER BY distance LIMIT 1;"
|
||||
|
||||
DEBUG(f"Executing query: {query} with params: {params}")
|
||||
|
||||
# Execute the query with the provided parameters
|
||||
result = await conn.fetchrow(query, *params)
|
||||
|
||||
# Close the connection
|
||||
await conn.close()
|
||||
|
||||
if result:
|
||||
location_info = Location(
|
||||
latitude=result['latitude'],
|
||||
longitude=result['longitude'],
|
||||
datetime=result.get['datetime'],
|
||||
zip=result['zip'],
|
||||
street=result.get('street', ''),
|
||||
city=result['city'],
|
||||
state=result['state'],
|
||||
country=result['country'],
|
||||
elevation=result.get('elevation', 0),
|
||||
distance=result.get('distance')
|
||||
)
|
||||
DEBUG(f"Found location: {location_info}")
|
||||
return location_info
|
||||
else:
|
||||
DEBUG("No location found with provided parameters.")
|
||||
return Location()
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error occurred: {e}")
|
||||
raise Exception("An error occurred while processing your request")
|
||||
|
||||
def find_override_locations(lat: float, lon: float) -> Optional[str]:
|
||||
# Load the JSON file
|
||||
with open(LOCATION_OVERRIDES, 'r') as file:
|
||||
locations = json.load(file)
|
||||
|
||||
closest_location = None
|
||||
closest_distance = float('inf')
|
||||
|
||||
# Iterate through each location entry in the JSON
|
||||
for location in locations:
|
||||
loc_name = location.get("name")
|
||||
loc_lat = location.get("latitude")
|
||||
loc_lon = location.get("longitude")
|
||||
loc_radius = location.get("radius")
|
||||
|
||||
# Calculate distance using haversine
|
||||
distance = haversine(lat, lon, loc_lat, loc_lon)
|
||||
|
||||
# Check if the distance is within the specified radius
|
||||
if distance <= loc_radius:
|
||||
if distance < closest_distance:
|
||||
closest_distance = distance
|
||||
closest_location = loc_name
|
||||
|
||||
return closest_location
|
||||
|
||||
def get_elevation(latitude, longitude):
|
||||
url = "https://api.open-elevation.com/api/v1/lookup"
|
||||
|
||||
payload = {
|
||||
"locations": [
|
||||
{
|
||||
"latitude": latitude,
|
||||
"longitude": longitude
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=payload)
|
||||
response.raise_for_status() # Raise an exception for unsuccessful requests
|
||||
|
||||
data = response.json()
|
||||
|
||||
if "results" in data:
|
||||
elevation = data["results"][0]["elevation"]
|
||||
return elevation
|
||||
else:
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
ERR(f"Error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def fetch_locations(start: datetime, end: datetime = None) -> List[Location]:
|
||||
start_datetime = localize_dt(start)
|
||||
|
||||
if end is None:
|
||||
end_datetime = localize_dt(start_datetime.replace(hour=23, minute=59, second=59))
|
||||
else:
|
||||
end_datetime = localize_dt(end)
|
||||
if start_datetime.time() == datetime.min.time() and end.time() == datetime.min.time():
|
||||
end_datetime = end_datetime.replace(hour=23, minute=59, second=59)
|
||||
|
||||
DEBUG(f"Fetching locations between {start_datetime} and {end_datetime}")
|
||||
conn = await get_db_connection()
|
||||
locations = []
|
||||
|
||||
# Check for records within the specified datetime range
|
||||
range_locations = await conn.fetch('''
|
||||
SELECT id, datetime,
|
||||
ST_X(ST_AsText(location)::geometry) AS longitude,
|
||||
ST_Y(ST_AsText(location)::geometry) AS latitude,
|
||||
ST_Z(ST_AsText(location)::geometry) AS elevation,
|
||||
city, state, zip, street,
|
||||
action, device_type, device_model, device_name, device_os
|
||||
FROM locations
|
||||
WHERE datetime >= $1 AND datetime <= $2
|
||||
ORDER BY datetime DESC
|
||||
''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None))
|
||||
|
||||
DEBUG(f"Range locations query returned: {range_locations}")
|
||||
locations.extend(range_locations)
|
||||
|
||||
if not locations and (end is None or start_datetime.date() == end.date()):
|
||||
location_data = await conn.fetchrow('''
|
||||
SELECT id, datetime,
|
||||
ST_X(ST_AsText(location)::geometry) AS longitude,
|
||||
ST_Y(ST_AsText(location)::geometry) AS latitude,
|
||||
ST_Z(ST_AsText(location)::geometry) AS elevation,
|
||||
city, state, zip, street,
|
||||
action, device_type, device_model, device_name, device_os
|
||||
FROM locations
|
||||
WHERE datetime < $1
|
||||
ORDER BY datetime DESC
|
||||
LIMIT 1
|
||||
''', start_datetime.replace(tzinfo=None))
|
||||
|
||||
DEBUG(f"Fallback query returned: {location_data}")
|
||||
if location_data:
|
||||
locations.append(location_data)
|
||||
|
||||
await conn.close()
|
||||
|
||||
DEBUG(f"Locations found: {locations}")
|
||||
|
||||
# Sort location_data based on the datetime field in descending order
|
||||
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
|
||||
|
||||
# Create Location objects directly from the location data
|
||||
location_objects = [Location(
|
||||
latitude=loc['latitude'],
|
||||
longitude=loc['longitude'],
|
||||
datetime=loc['datetime'],
|
||||
elevation=loc.get('elevation'),
|
||||
city=loc.get('city'),
|
||||
state=loc.get('state'),
|
||||
zip=loc.get('zip'),
|
||||
street=loc.get('street'),
|
||||
context={
|
||||
'action': loc.get('action'),
|
||||
'device_type': loc.get('device_type'),
|
||||
'device_model': loc.get('device_model'),
|
||||
'device_name': loc.get('device_name'),
|
||||
'device_os': loc.get('device_os')
|
||||
}
|
||||
) for loc in sorted_locations if loc['latitude'] is not None and loc['longitude'] is not None]
|
||||
|
||||
return location_objects if location_objects else []
|
||||
|
||||
# Function to fetch the last location before the specified datetime
|
||||
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
|
||||
datetime = localize_dt(datetime)
|
||||
|
||||
DEBUG(f"Fetching last location before {datetime}")
|
||||
conn = await get_db_connection()
|
||||
|
||||
location_data = await conn.fetchrow('''
|
||||
SELECT id, datetime,
|
||||
ST_X(ST_AsText(location)::geometry) AS longitude,
|
||||
ST_Y(ST_AsText(location)::geometry) AS latitude,
|
||||
ST_Z(ST_AsText(location)::geometry) AS elevation,
|
||||
city, state, zip, street, country,
|
||||
action
|
||||
FROM locations
|
||||
WHERE datetime < $1
|
||||
ORDER BY datetime DESC
|
||||
LIMIT 1
|
||||
''', datetime.replace(tzinfo=None))
|
||||
|
||||
await conn.close()
|
||||
|
||||
if location_data:
|
||||
DEBUG(f"Last location found: {location_data}")
|
||||
return Location(**location_data)
|
||||
else:
|
||||
DEBUG("No location found before the specified datetime")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@locate.get("/map/start_date={start_date_str}&end_date={end_date_str}", response_class=HTMLResponse)
|
||||
async def generate_map_endpoint(start_date_str: str, end_date_str: str):
|
||||
try:
|
||||
start_date = localize_dt(start_date_str)
|
||||
end_date = localize_dt(end_date_str)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid date format")
|
||||
|
||||
html_content = await generate_map(start_date, end_date)
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
|
||||
@locate.get("/map", response_class=HTMLResponse)
|
||||
async def generate_alltime_map_endpoint():
|
||||
try:
|
||||
start_date = localize_dt(datetime.fromisoformat("2022-01-01"))
|
||||
end_date = localize_dt(datetime.now())
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid date format")
|
||||
|
||||
html_content = await generate_map(start_date, end_date)
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
|
||||
async def generate_map(start_date: datetime, end_date: datetime):
|
||||
locations = await fetch_locations(start_date, end_date)
|
||||
if not locations:
|
||||
raise HTTPException(status_code=404, detail="No locations found for the given date range")
|
||||
|
||||
# Create a folium map centered around the first location
|
||||
map_center = [locations[0].latitude, locations[0].longitude]
|
||||
m = folium.Map(location=map_center, zoom_start=5)
|
||||
|
||||
# Add markers for each location
|
||||
for loc in locations:
|
||||
folium.Marker(
|
||||
location=[loc.latitude, loc.longitude],
|
||||
popup=f"{loc.city}, {loc.state}<br>Elevation: {loc.elevation}m<br>Date: {loc.datetime}",
|
||||
tooltip=f"{loc.city}, {loc.state}"
|
||||
).add_to(m)
|
||||
|
||||
# Save the map to an HTML file and return the HTML content
|
||||
map_html = "map.html"
|
||||
m.save(map_html)
|
||||
|
||||
with open(map_html, 'r') as file:
|
||||
html_content = file.read()
|
||||
|
||||
return html_content
|
||||
|
||||
|
||||
async def post_location(location: Location):
|
||||
DEBUG(f"post_location called with {location.datetime}")
|
||||
conn = await get_db_connection()
|
||||
try:
|
||||
context = location.context or {}
|
||||
action = context.get('action', 'manual')
|
||||
device_type = context.get('device_type', 'Unknown')
|
||||
device_model = context.get('device_model', 'Unknown')
|
||||
device_name = context.get('device_name', 'Unknown')
|
||||
device_os = context.get('device_os', 'Unknown')
|
||||
|
||||
# Parse and localize the datetime
|
||||
localized_datetime = localize_dt(location.datetime)
|
||||
|
||||
await conn.execute('''
|
||||
INSERT INTO locations (datetime, location, city, state, zip, street, action, device_type, device_model, device_name, device_os)
|
||||
VALUES ($1, ST_SetSRID(ST_MakePoint($2, $3, $4), 4326), $5, $6, $7, $8, $9, $10, $11, $12, $13)
|
||||
''', localized_datetime, location.longitude, location.latitude, location.elevation, location.city, location.state, location.zip, location.street, action, device_type, device_model, device_name, device_os)
|
||||
await conn.close()
|
||||
INFO(f"Successfully posted location: {location.latitude}, {location.longitude} on {localized_datetime}")
|
||||
return {
|
||||
'datetime': localized_datetime,
|
||||
'latitude': location.latitude,
|
||||
'longitude': location.longitude,
|
||||
'city': location.city,
|
||||
'state': location.state,
|
||||
'zip': location.zip,
|
||||
'street': location.street,
|
||||
'elevation': location.elevation,
|
||||
'action': action,
|
||||
'device_type': device_type,
|
||||
'device_model': device_model,
|
||||
'device_name': device_name,
|
||||
'device_os': device_os
|
||||
}
|
||||
except Exception as e:
|
||||
ERR(f"Error posting location {e}")
|
||||
ERR(traceback.format_exc())
|
||||
return None
|
||||
|
||||
@locate.post("/locate")
|
||||
async def post_locate_endpoint(locations: Union[Location, List[Location]]):
|
||||
responses = []
|
||||
|
||||
if isinstance(locations, Location):
|
||||
locations = [locations]
|
||||
|
||||
for location in locations:
|
||||
if not location.datetime:
|
||||
current_time = datetime.now(timezone.utc)
|
||||
location.datetime = current_time.isoformat()
|
||||
|
||||
if not location.elevation:
|
||||
location.elevation = location.altitude if location.altitude else get_elevation(location.latitude, location.longitude)
|
||||
|
||||
# Ensure context is a dictionary with default values if not provided
|
||||
if not location.context:
|
||||
location.context = {
|
||||
"action": "manual",
|
||||
"device_type": "Pythonista",
|
||||
"device_model": "Unknown",
|
||||
"device_name": "Unknown",
|
||||
"device_os": "Unknown"
|
||||
}
|
||||
|
||||
DEBUG(f"datetime before localization: {location.datetime}")
|
||||
# Convert datetime string to timezone-aware datetime object
|
||||
location.datetime = localize_dt(location.datetime)
|
||||
DEBUG(f"datetime after localization: {location.datetime}")
|
||||
|
||||
location_entry = await post_location(location)
|
||||
if location_entry:
|
||||
responses.append({"location_data": location_entry}) # Add weather data if necessary
|
||||
|
||||
await asyncio.sleep(0.1) # Use asyncio.sleep for async compatibility
|
||||
|
||||
return {"message": "Locations and weather updated", "results": responses}
|
||||
|
||||
|
||||
|
||||
# GET endpoint to fetch the last location before the specified datetime
|
||||
# @locate.get("/last_location", response_model=Union[Location, Dict[str, str]])
|
||||
@locate.get("/locate", response_model=List[Location])
|
||||
async def get_last_location() -> JSONResponse:
|
||||
query_datetime = datetime.now(TZ)
|
||||
DEBUG(f"Query_datetime: {query_datetime}")
|
||||
location = await fetch_last_location_before(query_datetime)
|
||||
if location:
|
||||
DEBUG(f"location: {location}")
|
||||
location_dict = location.model_dump() # use model_dump instead of dict
|
||||
location_dict["datetime"] = location.datetime.isoformat()
|
||||
return JSONResponse(content=location_dict)
|
||||
else:
|
||||
return JSONResponse(content={"message": "No location found before the specified datetime"}, status_code=404)
|
||||
|
||||
|
||||
@locate.get("/locate/{datetime_str}", response_model=List[Location])
|
||||
async def get_locate(datetime_str: str, all: bool = False):
|
||||
try:
|
||||
date_time = localize_dt(datetime_str)
|
||||
except ValueError as e:
|
||||
ERR(f"Invalid datetime string provided: {datetime_str}")
|
||||
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
|
||||
|
||||
locations = await fetch_locations(date_time)
|
||||
if not locations:
|
||||
raise HTTPException(status_code=404, detail="No nearby data found for this date and time")
|
||||
|
||||
return locations if all else [locations[0]]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
future_elevation = """
|
||||
def get_elevation_srtm(latitude, longitude, srtm_file):
|
||||
try:
|
||||
# Open the SRTM dataset
|
||||
dataset = gdal.Open(srtm_file)
|
||||
|
||||
# Get the geotransform and band information
|
||||
geotransform = dataset.GetGeoTransform()
|
||||
band = dataset.GetRasterBand(1)
|
||||
|
||||
# Calculate the pixel coordinates from the latitude and longitude
|
||||
x = int((longitude - geotransform[0]) / geotransform[1])
|
||||
y = int((latitude - geotransform[3]) / geotransform[5])
|
||||
|
||||
# Read the elevation value from the SRTM dataset
|
||||
elevation = band.ReadAsArray(x, y, 1, 1)[0][0]
|
||||
|
||||
# Close the dataset
|
||||
dataset = None
|
||||
|
||||
return elevation
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error: {e}")
|
||||
return None
|
||||
"""
|
||||
|
||||
def get_elevation2(latitude: float, longitude: float) -> float:
|
||||
url = f"https://nationalmap.gov/epqs/pqs.php?x={longitude}&y={latitude}&units=Meters&output=json"
|
||||
|
||||
try:
|
||||
response = requests.get(url)
|
||||
data = response.json()
|
||||
elevation = data["USGS_Elevation_Point_Query_Service"]["Elevation_Query"]["Elevation"]
|
||||
return float(elevation)
|
||||
except Exception as e:
|
||||
# Handle exceptions (e.g., network errors, API changes) appropriately
|
||||
raise RuntimeError(f"Error getting elevation data: {str(e)}")
|
1097
sijapi/routers/note.py
Normal file
1097
sijapi/routers/note.py
Normal file
File diff suppressed because it is too large
Load diff
16
sijapi/routers/rag.py
Normal file
16
sijapi/routers/rag.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
'''
|
||||
IN DEVELOPMENT: Retrieval-Augmented Generation module.
|
||||
NOTES: Haven't yet decided if this should depend on the Obsidian and Chat modules, or if they should depend on it, or one of one the other the other.
|
||||
'''
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
rag = APIRouter()
|
||||
|
||||
rag.get("/rag/search")
|
||||
async def rag_search_endpoint(query: str, scope: str):
|
||||
pass
|
||||
|
||||
rag.post("/rag/embed")
|
||||
async def rag_upload_endpoint(path: str):
|
||||
pass
|
440
sijapi/routers/sd.py
Normal file
440
sijapi/routers/sd.py
Normal file
|
@ -0,0 +1,440 @@
|
|||
'''
|
||||
Image generation module using StableDiffusion and similar models by way of ComfyUI.
|
||||
DEPENDS ON:
|
||||
LLM module
|
||||
COMFYUI_URL, COMFYUI_DIR, COMFYUI_OUTPUT_DIR, HOST_PORT, TS_SUBNET, TS_ADDRESS, DATA_DIR, CONFIG_DIR, SD_IMAGE_DIR, SD_WORKFLOWS_DIR, LOCAL_HOSTS, BASE_URL, PHOTOPRISM_USER*, PHOTOPRISM_URL*, PHOTOPRISM_PASS*
|
||||
*unimplemented.
|
||||
'''
|
||||
from fastapi import APIRouter, Request, Response
|
||||
from starlette.datastructures import Address
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
import aiofiles
|
||||
from PIL import Image
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
import json
|
||||
import ipaddress
|
||||
import socket
|
||||
import subprocess
|
||||
import os, re
|
||||
import random
|
||||
from io import BytesIO
|
||||
import base64
|
||||
import asyncio
|
||||
import shutil
|
||||
# from photoprism.Session import Session
|
||||
# from photoprism.Photo import Photo
|
||||
# from webdav3.client import Client
|
||||
from sijapi.routers.llm import query_ollama
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import COMFYUI_URL, COMFYUI_LAUNCH_CMD, COMFYUI_DIR, COMFYUI_OUTPUT_DIR, HOST_PORT, TS_SUBNET, CONFIG_DIR, SD_IMAGE_DIR, SD_WORKFLOWS_DIR, LOCAL_HOSTS, BASE_URL
|
||||
|
||||
sd = APIRouter()
|
||||
|
||||
uri_path = "img"
|
||||
|
||||
CONFIG_PATH = CONFIG_DIR / 'sd.json'
|
||||
with open(CONFIG_PATH, 'r') as config_file:
|
||||
CONFIG = json.load(config_file)
|
||||
|
||||
CLIENT_ID = str(uuid.uuid4())
|
||||
|
||||
@sd.post("/sd")
|
||||
@sd.post("/v1/images/generations")
|
||||
async def sd_endpoint(request: Request):
|
||||
request_data = await request.json()
|
||||
prompt = request_data.get("prompt")
|
||||
model = request_data.get("model")
|
||||
size = request_data.get("size")
|
||||
style = request_data.get("style") or "photorealistic"
|
||||
earlyurl = request_data.get("earlyurl", False)
|
||||
|
||||
filename = await workflow(prompt=prompt, scene=model, size=size, style=style, earlyurl=earlyurl)
|
||||
client_ip = ipaddress.ip_address(request.client.host)
|
||||
if client_ip in LOCAL_HOSTS:
|
||||
url = f"http://localhost:{HOST_PORT}/{uri_path}/{filename}"
|
||||
elif client_ip in TS_SUBNET:
|
||||
# url = f"{TS_ADDRESS}:{HOST_PORT}/{uri}"
|
||||
url = f"{BASE_URL}/{uri_path}/{filename}"
|
||||
else:
|
||||
url = f"We are watching you {request.client.host}"
|
||||
return JSONResponse({"image_url": url})
|
||||
|
||||
async def workflow(prompt: str, scene: str = None, size: str = None, style: str = "photorealistic", earlyurl: bool = False, destination_path: str = None):
|
||||
scene_data = get_scene(scene)
|
||||
if not scene_data:
|
||||
scene_data = get_matching_scene(prompt)
|
||||
prompt = scene_data['LLM_PrePrompt'] + prompt # LLM_PrePrompt serves as a preface to the prompt
|
||||
image_concept = await query_ollama(usr=prompt, sys=scene_data['LLM_SysMsg'], max_tokens=100)
|
||||
|
||||
DEBUG(f"sd_endpoint - image_concept: {image_concept}")
|
||||
scene_workflow = random.choice(scene_data['workflows'])
|
||||
size = size if size else scene_workflow.get('size', '1024x1024')
|
||||
try:
|
||||
width, height = map(int, size.split('x'))
|
||||
except ValueError:
|
||||
return JSONResponse({"error": "Invalid size format. Please use 'widthxheight'."})
|
||||
|
||||
workflow_path = Path(SD_WORKFLOWS_DIR) / scene_workflow['workflow']
|
||||
workflow_data = json.loads(workflow_path.read_text())
|
||||
post = {
|
||||
"API_PPrompt": scene_data['API_PPrompt'] + image_concept + "; ((" + scene_data['triggers'][0] + ")) ",
|
||||
"API_SPrompt": scene_data['API_SPrompt'],
|
||||
"API_NPrompt": scene_data['API_NPrompt'],
|
||||
"width": width,
|
||||
"height": height
|
||||
}
|
||||
saved_file_key = update_prompt(workflow=workflow_data, post=post)
|
||||
DEBUG(f"Saved file key: {saved_file_key}")
|
||||
preset_values = scene_workflow.get('preset_values')
|
||||
set_presets(workflow_data, preset_values)
|
||||
DEBUG(f"Workflow_data: {workflow_data}")
|
||||
prompt_id = await queue_prompt(workflow_data)
|
||||
|
||||
if destination_path is None:
|
||||
destination_path = SD_IMAGE_DIR / f"{prompt_id}"
|
||||
|
||||
destination_path = Path(destination_path).with_suffix(".jpg")
|
||||
|
||||
max_size = max(width, height)
|
||||
if earlyurl:
|
||||
asyncio.create_task(poll_status_and_save(prompt_id, destination_path, max_size, 90))
|
||||
return get_return_path(destination_path)
|
||||
else:
|
||||
local_path = await poll_status_and_save(prompt_id, destination_path, max_size, 90)
|
||||
await asyncio.sleep(1)
|
||||
return get_return_path(destination_path)
|
||||
|
||||
|
||||
def set_presets(workflow_data, preset_values):
|
||||
if preset_values:
|
||||
preset_node = preset_values.get('node')
|
||||
preset_key = preset_values.get('key')
|
||||
values = preset_values.get('values')
|
||||
|
||||
if preset_node and preset_key and values:
|
||||
preset_value = random.choice(values)
|
||||
if 'inputs' in workflow_data.get(preset_node, {}):
|
||||
workflow_data[preset_node]['inputs'][preset_key] = preset_value
|
||||
else:
|
||||
DEBUG("Node not found in workflow_data")
|
||||
else:
|
||||
DEBUG("Required data missing in preset_values")
|
||||
else:
|
||||
DEBUG("No preset_values found")
|
||||
|
||||
|
||||
def get_return_path(destination_path):
|
||||
sd_dir = Path(SD_IMAGE_DIR)
|
||||
if destination_path.parent.samefile(sd_dir):
|
||||
return destination_path.name
|
||||
else:
|
||||
return str(destination_path)
|
||||
|
||||
# This allows selected scenes by name
|
||||
def get_scene(scene):
|
||||
for scene_data in CONFIG['scenes']:
|
||||
if scene_data['scene'] == scene:
|
||||
return scene_data
|
||||
return None
|
||||
|
||||
# This returns the scene with the most trigger words present in the provided prompt, or otherwise if none match it returns the first scene in the array - meaning the first should be considered the default scene.
|
||||
def get_matching_scene(prompt):
|
||||
prompt_lower = prompt.lower()
|
||||
max_count = 0
|
||||
scene_data = None
|
||||
for sc in CONFIG['scenes']:
|
||||
count = sum(1 for trigger in sc['triggers'] if trigger in prompt_lower)
|
||||
if count > max_count:
|
||||
max_count = count
|
||||
scene_data = sc
|
||||
return scene_data if scene_data else CONFIG['scenes'][0] # fall back on first scene, which should be an appropriate default scene.
|
||||
|
||||
|
||||
async def poll_status_and_save(prompt_id, destination_path: Path, max_size=1440, quality=90):
|
||||
try:
|
||||
status_data = await poll_status(prompt_id)
|
||||
save_image_key = None
|
||||
for key, value in status_data.get("outputs", {}).items():
|
||||
if "images" in value:
|
||||
save_image_key = key
|
||||
break
|
||||
if save_image_key:
|
||||
image_data = await get_image(status_data, save_image_key)
|
||||
new_path = await save_as_jpg(image_data, prompt_id, max_size, quality, destination_path)
|
||||
if destination_path and destination_path != new_path:
|
||||
WARN(f"Unusual behavior in poll_status_and_save and save_as_jpg. Destination path: {destination_path}, new_path returned by save_as_jpg: {new_path}")
|
||||
shutil.move(new_path, destination_path)
|
||||
return new_path
|
||||
except Exception as e:
|
||||
raise Exception(f"Error in poll_status_and_save: {e}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
async def ensure_comfy():
|
||||
try:
|
||||
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
|
||||
print("ComfyUI is already running.")
|
||||
return
|
||||
except (socket.timeout, ConnectionRefusedError):
|
||||
print("ComfyUI is not running. Starting it now...")
|
||||
|
||||
try:
|
||||
tmux_command = (
|
||||
"tmux split-window -h "
|
||||
"\"source /Users/sij/.zshrc; cd /Users/sij/workshop/ComfyUI; "
|
||||
"mamba activate comfyui && "
|
||||
"python main.py; exec $SHELL\""
|
||||
)
|
||||
subprocess.Popen(tmux_command, shell=True)
|
||||
|
||||
print("ComfyUI started in a new tmux session.")
|
||||
|
||||
# Wait for 10 seconds
|
||||
await asyncio.sleep(15)
|
||||
|
||||
print("Waited 15 seconds after starting ComfyUI.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error starting ComfyUI: {e}")
|
||||
|
||||
|
||||
|
||||
|
||||
async def poll_status(prompt_id):
|
||||
"""Asynchronously poll the job status until it's complete and return the status data."""
|
||||
start_time = asyncio.get_event_loop().time() # Use asyncio's loop
|
||||
await ensure_comfy()
|
||||
async with ClientSession() as session:
|
||||
while True:
|
||||
elapsed_time = int(asyncio.get_event_loop().time() - start_time) # Calculate elapsed time in seconds
|
||||
response = await session.get(f"{COMFYUI_URL}/history/{prompt_id}")
|
||||
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds")
|
||||
if response.status != 200:
|
||||
raise Exception("Failed to get job status")
|
||||
status_data = await response.json()
|
||||
job_data = status_data.get(prompt_id, {})
|
||||
if job_data.get("status", {}).get("completed", False):
|
||||
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||
return job_data
|
||||
await asyncio.sleep(10)
|
||||
|
||||
async def get_image(status_data, key):
|
||||
"""Asynchronously extract the filename and subfolder from the status data and read the file."""
|
||||
try:
|
||||
outputs = status_data.get("outputs", {})
|
||||
images_info = outputs.get(key, {}).get("images", [])
|
||||
if not images_info:
|
||||
raise Exception("No images found in the job output.")
|
||||
|
||||
image_info = images_info[0] # Assuming the first image is the target
|
||||
filename = image_info.get("filename")
|
||||
subfolder = image_info.get("subfolder", "") # Default to empty if not present
|
||||
file_path = os.path.join(COMFYUI_OUTPUT_DIR, subfolder, filename)
|
||||
|
||||
async with aiofiles.open(file_path, 'rb') as file:
|
||||
return await file.read()
|
||||
except KeyError as e:
|
||||
raise Exception(f"Failed to extract image information due to missing key: {e}")
|
||||
except FileNotFoundError:
|
||||
raise Exception(f"File {filename} not found at the expected path {file_path}")
|
||||
|
||||
|
||||
async def save_as_jpg(image_data, prompt_id, max_size=2160, quality=80, destination_path: Path = None, keep_original: bool = True):
|
||||
destination_path_png = (SD_IMAGE_DIR / prompt_id).with_suffix(".png")
|
||||
destination_path_jpg = destination_path.with_suffix(".jpg") if destination_path else (SD_IMAGE_DIR / prompt_id).with_suffix(".jpg")
|
||||
|
||||
DEBUG(f"Constructed jpg destination: {destination_path_jpg}")
|
||||
try:
|
||||
# Creates the directory if does not exist
|
||||
destination_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
await asyncio.sleep(1)
|
||||
# Save the image as .png
|
||||
async with aiofiles.open(destination_path_png, 'wb') as file:
|
||||
await file.write(image_data)
|
||||
|
||||
with Image.open(destination_path_png) as img:
|
||||
# Resize image if needed
|
||||
if max(img.size) > max_size:
|
||||
ratio = max_size / max(img.size)
|
||||
new_size = tuple([int(x * ratio) for x in img.size])
|
||||
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
||||
|
||||
# Convert and save image as .jpg
|
||||
img.convert('RGB').save(destination_path_jpg, format='JPEG', quality=quality)
|
||||
|
||||
if keep_original == False:
|
||||
os.remove(destination_path_png)
|
||||
|
||||
return str(destination_path_jpg)
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error processing image: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
# async def upload_and_get_shareable_link(image_path):
|
||||
# try:
|
||||
# Set up the PhotoPrism session
|
||||
# pp_session = Session(PHOTOPRISM_USER, PHOTOPRISM_PASS, PHOTOPRISM_URL, use_https=True)
|
||||
# pp_session.create()
|
||||
|
||||
# Start import
|
||||
# photo = Photo(pp_session)
|
||||
# photo.start_import(path=os.path.dirname(image_path))
|
||||
|
||||
# Give PhotoPrism some time to process the upload
|
||||
# await asyncio.sleep(5)
|
||||
|
||||
# Search for the uploaded photo
|
||||
# photo_name = os.path.basename(image_path)
|
||||
# search_results = photo.search(query=f"name:{photo_name}", count=1)
|
||||
|
||||
# if search_results['photos']:
|
||||
# photo_uuid = search_results['photos'][0]['uuid']
|
||||
# shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}"
|
||||
# return shareable_link
|
||||
# else:
|
||||
# ERR("Could not find the uploaded photo details.")
|
||||
# return None
|
||||
# except Exception as e:
|
||||
# ERR(f"Error in upload_and_get_shareable_link: {e}")
|
||||
# return None
|
||||
|
||||
|
||||
|
||||
@sd.get("/image/{prompt_id}")
|
||||
async def get_image_status(prompt_id: str):
|
||||
status_data = await poll_status(prompt_id)
|
||||
save_image_key = None
|
||||
for key, value in status_data.get("outputs", {}).items():
|
||||
if "images" in value:
|
||||
save_image_key = key
|
||||
break
|
||||
if save_image_key:
|
||||
image_data = await get_image(status_data, save_image_key)
|
||||
await save_as_jpg(image_data, prompt_id)
|
||||
external_url = f"https://api.lone.blue/img/{prompt_id}.jpg"
|
||||
return JSONResponse({"image_url": external_url})
|
||||
else:
|
||||
return JSONResponse(content={"status": "Processing", "details": status_data}, status_code=202)
|
||||
|
||||
@sd.get("/image-status/{prompt_id}")
|
||||
async def get_image_processing_status(prompt_id: str):
|
||||
try:
|
||||
status_data = await poll_status(prompt_id)
|
||||
return JSONResponse(content={"status": "Processing", "details": status_data}, status_code=200)
|
||||
except Exception as e:
|
||||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||
|
||||
|
||||
|
||||
@sd.options("/v1/images/generations", tags=["generations"])
|
||||
async def get_generation_options():
|
||||
return {
|
||||
"model": {
|
||||
"description": "The model to use for image generation.",
|
||||
"type": "string",
|
||||
"example": "stable-diffusion"
|
||||
},
|
||||
"prompt": {
|
||||
"description": "The text prompt for the image generation.",
|
||||
"type": "string",
|
||||
"required": True,
|
||||
"example": "A beautiful sunset over the ocean."
|
||||
},
|
||||
"n": {
|
||||
"description": "The number of images to generate.",
|
||||
"type": "integer",
|
||||
"default": 1,
|
||||
"example": 3
|
||||
},
|
||||
"size": {
|
||||
"description": "The size of the generated images in 'widthxheight' format.",
|
||||
"type": "string",
|
||||
"default": "1024x1024",
|
||||
"example": "512x512"
|
||||
},
|
||||
"style": {
|
||||
"description": "The style for the generated images.",
|
||||
"type": "string",
|
||||
"default": "photorealistic",
|
||||
"example": "cartoon"
|
||||
},
|
||||
"raw": {
|
||||
"description": "Whether to return raw image data or not.",
|
||||
"type": "boolean",
|
||||
"default": False
|
||||
},
|
||||
"earlyurl": {
|
||||
"description": "Whether to return the URL early or wait for the image to be ready.",
|
||||
"type": "boolean",
|
||||
"default": False
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async def load_workflow(workflow_path: str, workflow:str):
|
||||
workflow_path = workflow_path if workflow_path else os.path.join(SD_WORKFLOWS_DIR, f"{workflow}.json" if not workflow.endswith('.json') else workflow)
|
||||
with open(workflow_path, 'r') as file:
|
||||
return json.load(file)
|
||||
|
||||
def update_prompt(workflow: dict, post: dict, found_key=[None], path=None):
|
||||
if path is None:
|
||||
path = []
|
||||
|
||||
try:
|
||||
if isinstance(workflow, dict):
|
||||
for key, value in workflow.items():
|
||||
current_path = path + [key]
|
||||
|
||||
if isinstance(value, dict):
|
||||
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
|
||||
found_key[0] = key
|
||||
update_prompt(value, post, found_key=found_key, path=current_path)
|
||||
elif isinstance(value, list):
|
||||
# Recursive call with updated path for each item in a list
|
||||
for index, item in enumerate(value):
|
||||
update_prompt(item, post, found_key, current_path + [str(index)])
|
||||
|
||||
if value == "API_PPrompt":
|
||||
workflow[key] = post.get(value, "")
|
||||
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
|
||||
elif value == "API_SPrompt":
|
||||
workflow[key] = post.get(value, "")
|
||||
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
|
||||
elif value == "API_NPrompt":
|
||||
workflow[key] = post.get(value, "")
|
||||
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
|
||||
elif key == "seed" or key == "noise_seed":
|
||||
workflow[key] = random.randint(1000000000000, 9999999999999)
|
||||
DEBUG(f"Updated seed to: {workflow[key]}")
|
||||
elif key in ["width", "max_width", "scaled_width", "side_length"]:
|
||||
workflow[key] = post["width"]
|
||||
elif key in ["height", "max_height", "scaled_height"]:
|
||||
workflow[key] = post["height"]
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error in update_prompt at path {' -> '.join(path)}: {e}")
|
||||
raise
|
||||
|
||||
return found_key[0]
|
||||
|
||||
async def queue_prompt(prompt: dict):
|
||||
DEBUG(f"Dict to queue: {prompt}")
|
||||
async with ClientSession() as session:
|
||||
await ensure_comfy()
|
||||
response = await session.post(f"{COMFYUI_URL}/prompt", json={"prompt": prompt, "client_id": CLIENT_ID})
|
||||
if response.status == 200:
|
||||
json_data = await response.json() # Properly await the JSON parsing
|
||||
return json_data.get('prompt_id') # Then access the 'prompt_id'
|
||||
else:
|
||||
error_message = f"Failed to queue prompt. Status code: {response.status}, Response body: {await response.text()}"
|
||||
ERR(error_message)
|
||||
raise Exception(error_message)
|
30
sijapi/routers/serve.py
Normal file
30
sijapi/routers/serve.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
'''
|
||||
Web server module. Used by other modules when serving static content is required, e.g. the sd image generation module. Also used to serve PUBLIC_KEY.
|
||||
'''
|
||||
import os
|
||||
from fastapi import APIRouter, Form, HTTPException, Request, Response
|
||||
from fastapi.responses import FileResponse
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from pathlib import Path
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi.utilities import bool_convert, sanitize_filename
|
||||
from sijapi import DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY
|
||||
|
||||
serve = APIRouter(tags=["public"])
|
||||
|
||||
@serve.get("/pgp")
|
||||
async def get_pgp():
|
||||
return Response(PUBLIC_KEY, media_type="text/plain")
|
||||
|
||||
@serve.get("/img/{image_name}")
|
||||
def serve_image(image_name: str):
|
||||
image_path = os.path.join(SD_IMAGE_DIR, image_name)
|
||||
if os.path.exists(image_path):
|
||||
return FileResponse(image_path)
|
||||
else:
|
||||
return {"error": "Image not found"}
|
211
sijapi/routers/summarize.py
Normal file
211
sijapi/routers/summarize.py
Normal file
|
@ -0,0 +1,211 @@
|
|||
from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, UploadFile
|
||||
from fastapi.responses import FileResponse
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import filetype
|
||||
import shutil
|
||||
import os
|
||||
import re
|
||||
from os.path import basename, splitext
|
||||
from datetime import datetime
|
||||
from typing import Optional, Union, List
|
||||
from PyPDF2 import PdfReader
|
||||
from pdfminer.high_level import extract_text as pdfminer_extract_text
|
||||
import pytesseract
|
||||
from pdf2image import convert_from_path
|
||||
import asyncio
|
||||
import html2text
|
||||
import markdown
|
||||
from ollama import Client, AsyncClient
|
||||
from docx import Document
|
||||
|
||||
from sijapi.routers.tts import generate_speech
|
||||
from sijapi.routers.asr import transcribe_audio
|
||||
from sijapi.utilities import sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension, f
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
|
||||
|
||||
summarize = APIRouter(tags=["trusted", "private"])
|
||||
|
||||
@summarize.get("/summarize")
|
||||
async def summarize_get(text: str = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
|
||||
summarized_text = await summarize_text(text, instruction)
|
||||
return summarized_text
|
||||
|
||||
@summarize.post("/summarize")
|
||||
async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
|
||||
text_content = text if text else await extract_text(file)
|
||||
summarized_text = await summarize_text(text_content, instruction)
|
||||
return summarized_text
|
||||
|
||||
@summarize.post("/speaksummary")
|
||||
async def summarize_tts_endpoint(background_tasks: BackgroundTasks, instruction: str = Form(SUMMARY_INSTRUCT), file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), voice: Optional[str] = Form(DEFAULT_VOICE), speed: Optional[float] = Form(1.2), podcast: Union[bool, str] = Form(False)):
|
||||
|
||||
podcast = str_to_bool(str(podcast)) # Proper boolean conversion
|
||||
text_content = text if text else extract_text(file)
|
||||
final_output_path = await summarize_tts(text_content, instruction, voice, speed, podcast)
|
||||
return FileResponse(path=final_output_path, filename=os.path.basename(final_output_path), media_type='audio/wav')
|
||||
|
||||
|
||||
async def summarize_tts(
|
||||
text: str,
|
||||
instruction: str = SUMMARY_INSTRUCT,
|
||||
voice: Optional[str] = DEFAULT_VOICE,
|
||||
speed: float = 1.1,
|
||||
podcast: bool = False,
|
||||
LLM: AsyncClient = None
|
||||
):
|
||||
LLM = LLM if LLM else AsyncClient()
|
||||
summarized_text = await summarize_text(text, instruction, LLM=LLM)
|
||||
filename = await summarize_text(summarized_text, "Provide a title for this summary no longer than 4 words")
|
||||
filename = sanitize_filename(filename)
|
||||
filename = ' '.join(filename.split()[:5])
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"{timestamp}{filename}.wav"
|
||||
|
||||
background_tasks = BackgroundTasks()
|
||||
final_output_path = await generate_speech(background_tasks, summarized_text, voice, "xtts", speed=speed, podcast=podcast, title=filename)
|
||||
DEBUG(f"summary_tts completed with final_output_path: {final_output_path}")
|
||||
return final_output_path
|
||||
|
||||
|
||||
|
||||
async def get_title(text: str, LLM: AsyncClient() = None):
|
||||
LLM = LLM if LLM else AsyncClient()
|
||||
title = await process_chunk("Generate a title for this text", text, 1, 1, 12, LLM)
|
||||
title = sanitize_filename(title)
|
||||
return title
|
||||
|
||||
def split_text_into_chunks(text: str) -> List[str]:
|
||||
"""
|
||||
Splits the given text into manageable chunks based on predefined size and overlap.
|
||||
"""
|
||||
words = text.split()
|
||||
adjusted_chunk_size = max(1, int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)) # Ensure at least 1
|
||||
adjusted_overlap = max(0, int(SUMMARY_CHUNK_OVERLAP / SUMMARY_TPW)) # Ensure non-negative
|
||||
chunks = []
|
||||
for i in range(0, len(words), adjusted_chunk_size - adjusted_overlap):
|
||||
DEBUG(f"We are on iteration # {i} if split_text_into_chunks.")
|
||||
chunk = ' '.join(words[i:i + adjusted_chunk_size])
|
||||
chunks.append(chunk)
|
||||
return chunks
|
||||
|
||||
|
||||
def calculate_max_tokens(text: str) -> int:
|
||||
tokens_count = max(1, int(len(text.split()) * SUMMARY_TPW)) # Ensure at least 1
|
||||
return min(tokens_count // 4, SUMMARY_CHUNK_SIZE)
|
||||
|
||||
|
||||
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], background_tasks: BackgroundTasks = None) -> str:
|
||||
if isinstance(file, UploadFile):
|
||||
file_extension = get_extension(file)
|
||||
temp_file_path = tempfile.mktemp(suffix=file_extension)
|
||||
with open(temp_file_path, 'wb') as buffer:
|
||||
shutil.copyfileobj(file.file, buffer)
|
||||
file_path = temp_file_path
|
||||
elif isinstance(file, (bytes, bytearray)):
|
||||
temp_file_path = tempfile.mktemp()
|
||||
with open(temp_file_path, 'wb') as buffer:
|
||||
buffer.write(file)
|
||||
file_path = temp_file_path
|
||||
elif isinstance(file, (str, Path)):
|
||||
file_path = str(file)
|
||||
else:
|
||||
raise ValueError("Unsupported file type")
|
||||
|
||||
_, file_ext = os.path.splitext(file_path)
|
||||
file_ext = file_ext.lower()
|
||||
text_content = ""
|
||||
|
||||
if file_ext == '.pdf':
|
||||
text_content = await extract_text_from_pdf(file_path)
|
||||
elif file_ext in ['.wav', '.m4a', '.m4v', '.mp3', '.mp4']:
|
||||
text_content = await transcribe_audio(file_path=file_path)
|
||||
elif file_ext == '.md':
|
||||
text_content = await read_text_file(file_path)
|
||||
text_content = markdown.markdown(text_content)
|
||||
elif file_ext == '.html':
|
||||
text_content = await read_text_file(file_path)
|
||||
text_content = html2text.html2text(text_content)
|
||||
elif file_ext in ['.txt', '.csv', '.json']:
|
||||
text_content = await read_text_file(file_path)
|
||||
elif file_ext == '.docx':
|
||||
text_content = await extract_text_from_docx(file_path)
|
||||
|
||||
if background_tasks and 'temp_file_path' in locals():
|
||||
background_tasks.add_task(os.remove, temp_file_path)
|
||||
elif 'temp_file_path' in locals():
|
||||
os.remove(temp_file_path)
|
||||
|
||||
return text_content
|
||||
|
||||
async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_override: int = None, length_quotient: float = SUMMARY_LENGTH_RATIO, LLM: AsyncClient = None):
|
||||
"""
|
||||
Process the given text: split into chunks, summarize each chunk, and
|
||||
potentially summarize the concatenated summary for long texts.
|
||||
"""
|
||||
LLM = LLM if LLM else AsyncClient()
|
||||
|
||||
chunked_text = split_text_into_chunks(text)
|
||||
total_parts = max(1, len(chunked_text)) # Ensure at least 1
|
||||
|
||||
total_words_count = len(text.split())
|
||||
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW)) # Ensure at least 1
|
||||
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
|
||||
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
|
||||
individual_summary_length = max(1, corrected_total_summary_length // total_parts) # Ensure at least 1
|
||||
|
||||
DEBUG(f"Text split into {total_parts} chunks.")
|
||||
summaries = await asyncio.gather(*[
|
||||
process_chunk(instruction, chunk, i+1, total_parts, individual_summary_length, LLM) for i, chunk in enumerate(chunked_text)
|
||||
])
|
||||
|
||||
concatenated_summary = ' '.join(summaries)
|
||||
|
||||
if total_parts > 1:
|
||||
concatenated_summary = await process_chunk(instruction, concatenated_summary, 1, 1)
|
||||
|
||||
return concatenated_summary
|
||||
|
||||
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, max_tokens: Optional[int] = None, LLM: AsyncClient = None) -> str:
|
||||
"""
|
||||
Process a portion of text using the ollama library asynchronously.
|
||||
"""
|
||||
|
||||
LLM = LLM if LLM else AsyncClient()
|
||||
|
||||
words_count = max(1, len(text.split())) # Ensure at least 1
|
||||
tokens_count = max(1, int(words_count * SUMMARY_TPW)) # Ensure at least 1
|
||||
fraction_tokens = max(1, tokens_count // SUMMARY_LENGTH_RATIO) # Ensure at least 1
|
||||
if max_tokens is None:
|
||||
max_tokens = min(fraction_tokens, SUMMARY_CHUNK_SIZE // max(1, total_parts)) # Ensure at least 1
|
||||
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) # Ensure a minimum token count to avoid tiny processing chunks
|
||||
|
||||
DEBUG(f"Summarizing part {part} of {total_parts}: Max_tokens: {max_tokens}")
|
||||
|
||||
if part and total_parts > 1:
|
||||
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
|
||||
else:
|
||||
prompt = f"{instruction}:\n\n{text}"
|
||||
|
||||
DEBUG(f"Starting LLM.generate for part {part} of {total_parts}")
|
||||
response = await LLM.generate(
|
||||
model=SUMMARY_MODEL,
|
||||
prompt=prompt,
|
||||
stream=False,
|
||||
options={'num_predict': max_tokens, 'temperature': 0.6}
|
||||
)
|
||||
|
||||
text_response = response['response']
|
||||
DEBUG(f"Completed LLM.generate for part {part} of {total_parts}")
|
||||
|
||||
return text_response
|
||||
|
||||
async def title_and_summary(extracted_text: str):
|
||||
title = await get_title(extracted_text)
|
||||
processed_title = title.split("\n")[-1]
|
||||
processed_title = processed_title.split("\r")[-1]
|
||||
processed_title = sanitize_filename(processed_title)
|
||||
summary = await summarize_text(extracted_text)
|
||||
|
||||
return processed_title, summary
|
577
sijapi/routers/time.py
Normal file
577
sijapi/routers/time.py
Normal file
|
@ -0,0 +1,577 @@
|
|||
import tempfile
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
import csv
|
||||
import subprocess
|
||||
import asyncio
|
||||
import httpx
|
||||
import io
|
||||
import re
|
||||
import pytz
|
||||
import httpx
|
||||
import sqlite3
|
||||
import math
|
||||
from httpx import Timeout
|
||||
from fastapi import APIRouter, UploadFile, File, Response, Header, Query, Depends, FastAPI, Request, HTTPException, status
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
from datetime import datetime, timedelta
|
||||
from sijapi.utilities import localize_dt
|
||||
from decimal import Decimal, ROUND_UP
|
||||
from typing import Optional, List, Dict, Union, Tuple
|
||||
from collections import defaultdict
|
||||
from dotenv import load_dotenv
|
||||
from traceback import format_exc
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import HOME_DIR, TIMING_API_KEY, TIMING_API_URL
|
||||
|
||||
### INITIALIZATIONS ###
|
||||
time = APIRouter(tags=["private"])
|
||||
|
||||
|
||||
########################
|
||||
#### INITIALIZATION ####
|
||||
########################
|
||||
|
||||
script_directory = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# Configuration constants
|
||||
pacific = pytz.timezone('America/Los_Angeles')
|
||||
|
||||
emoji_pattern = re.compile(r'^[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F700-\U0001F77F\U0001F780-\U0001F7FF\U0001F800-\U0001F8FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FA6F\U0001FA70-\U0001FAFF\U00002702-\U000027B0\U000024C2-\U0001F251]+ ')
|
||||
timeout = Timeout(connect=30, read=600, write=120, pool=5)
|
||||
|
||||
# Define your models
|
||||
class TimingRequest(BaseModel):
|
||||
start_date: str = Field(..., pattern=r"\d{4}-\d{2}-\d{2}")
|
||||
end_date: Optional[str] = Field(None, pattern=r"\d{4}-\d{2}-\d{2}")
|
||||
output_format: Optional[str] = 'json'
|
||||
|
||||
|
||||
####################
|
||||
#### TIMING API ####
|
||||
####################
|
||||
@time.post("/time/post")
|
||||
async def post_time_entry_to_timing(entry: Dict):
|
||||
url = 'https://web.timingapp.com/api/v1/time-entries'
|
||||
headers = {
|
||||
'Authorization': f'Bearer {TIMING_API_KEY}',
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
'X-Time-Zone': 'America/Los_Angeles'
|
||||
}
|
||||
DEBUG(f"Received entry: {entry}")
|
||||
response = None # Initialize response
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(url, headers=headers, json=entry)
|
||||
response.raise_for_status() # This will only raise for 4xx and 5xx responses
|
||||
except httpx.HTTPStatusError as exc:
|
||||
DEBUG(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
|
||||
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
|
||||
except Exception as exc:
|
||||
DEBUG(f"General exception caught: {exc}")
|
||||
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
||||
|
||||
if response:
|
||||
return response.json()
|
||||
else:
|
||||
# Handle the case where the response was not set due to an error.
|
||||
raise HTTPException(status_code=500, detail="Failed to make the external API request")
|
||||
|
||||
def project_sort_key(project):
|
||||
# Remove any leading emoji characters for sorting
|
||||
return emoji_pattern.sub('', project)
|
||||
|
||||
|
||||
def prepare_date_range_for_query(start_date, end_date=None):
|
||||
# Adjust the start date to include the day before
|
||||
start_date_adjusted = (datetime.strptime(start_date, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
# If end_date is not provided, use the original start_date as the end_date
|
||||
end_date = end_date if end_date else start_date
|
||||
# Format the end_date
|
||||
end_date_formatted = f"{end_date}T23:59:59"
|
||||
return f"{start_date_adjusted}T00:00:00", end_date_formatted
|
||||
|
||||
|
||||
def truncate_project_title(title):
|
||||
return title.split(' - ')[0] if ' - ' in title else title
|
||||
|
||||
|
||||
async def fetch_and_prepare_timing_data(start: datetime, end: Optional[datetime] = None) -> List[Dict]:
|
||||
# start_date = localize_dt(start)
|
||||
# end_date = localize_dt(end) if end else None
|
||||
# Adjust the start date to include the day before and format the end date
|
||||
start_date_adjusted = (start - timedelta(days=1)).strftime("%Y-%m-%dT00:00:00")
|
||||
end_date_formatted = f"{datetime.strftime(end, '%Y-%m-%d')}T23:59:59" if end else f"{datetime.strftime(start, '%Y-%m-%d')}T23:59:59"
|
||||
|
||||
# Fetch timing data from the API using TIMING_API_KEY
|
||||
url = f"{TIMING_API_URL}/time-entries?start_date_min={start_date_adjusted}&start_date_max={end_date_formatted}&include_project_data=1"
|
||||
headers = {
|
||||
'Authorization': f'Bearer {TIMING_API_KEY}',
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
'X-Time-Zone': 'America/Los_Angeles'
|
||||
}
|
||||
|
||||
processed_timing_data = []
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
response.raise_for_status()
|
||||
|
||||
raw_timing_data = response.json().get('data', [])
|
||||
|
||||
for entry in raw_timing_data:
|
||||
entry_start_utc = datetime.strptime(entry['start_date'], '%Y-%m-%dT%H:%M:%S.%f%z')
|
||||
entry_end_utc = datetime.strptime(entry['end_date'], '%Y-%m-%dT%H:%M:%S.%f%z')
|
||||
|
||||
entry_start_pacific = entry_start_utc.astimezone(pacific)
|
||||
entry_end_pacific = entry_end_utc.astimezone(pacific)
|
||||
|
||||
while entry_start_pacific.date() < entry_end_pacific.date():
|
||||
midnight = pacific.localize(datetime.combine(entry_start_pacific.date() + timedelta(days=1), datetime.min.time()))
|
||||
duration_to_midnight = (midnight - entry_start_pacific).total_seconds()
|
||||
|
||||
if entry_start_pacific.date() >= start.date():
|
||||
processed_entry = create_time_entry(entry, entry_start_pacific, midnight, duration_to_midnight)
|
||||
processed_timing_data.append(processed_entry)
|
||||
|
||||
entry_start_pacific = midnight
|
||||
|
||||
if entry_start_pacific.date() >= start.date():
|
||||
duration_remaining = (entry_end_pacific - entry_start_pacific).total_seconds()
|
||||
processed_entry = create_time_entry(entry, entry_start_pacific, entry_end_pacific, duration_remaining)
|
||||
processed_timing_data.append(processed_entry)
|
||||
|
||||
return processed_timing_data
|
||||
|
||||
|
||||
def format_duration(duration):
|
||||
duration_in_hours = Decimal(duration) / Decimal(3600)
|
||||
rounded_duration = duration_in_hours.quantize(Decimal('0.1'), rounding=ROUND_UP)
|
||||
return str(rounded_duration)
|
||||
|
||||
|
||||
def create_time_entry(original_entry, start_time, end_time, duration_seconds):
|
||||
"""Formats a time entry, preserving key details and adding necessary elements."""
|
||||
|
||||
# Format start and end times in the appropriate timezone
|
||||
start_time_aware = start_time.astimezone(pacific)
|
||||
end_time_aware = end_time.astimezone(pacific)
|
||||
|
||||
# Check if project is None and handle accordingly
|
||||
if original_entry.get('project'):
|
||||
project_title = original_entry['project'].get('title', 'No Project')
|
||||
project_color = original_entry['project'].get('color', '#FFFFFF') # Default color
|
||||
else:
|
||||
project_title = 'No Project'
|
||||
project_color = '#FFFFFF' # Default color
|
||||
|
||||
# Construct the processed entry
|
||||
processed_entry = {
|
||||
'start_time': start_time_aware.strftime('%Y-%m-%dT%H:%M:%S.%f%z'),
|
||||
'end_time': end_time_aware.strftime('%Y-%m-%dT%H:%M:%S.%f%z'),
|
||||
'start_date': start_time_aware.strftime('%Y-%m-%d'),
|
||||
'end_date': end_time_aware.strftime('%Y-%m-%d'),
|
||||
'duration': format_duration(duration_seconds),
|
||||
'notes': original_entry.get('notes', ''),
|
||||
'title': original_entry.get('title', 'Untitled'),
|
||||
'is_running': original_entry.get('is_running', False),
|
||||
'project': {
|
||||
'title': project_title,
|
||||
'color': project_color,
|
||||
# Include other project fields as needed
|
||||
},
|
||||
# Additional original fields as required
|
||||
}
|
||||
return processed_entry
|
||||
|
||||
|
||||
# TIMELINE
|
||||
@time.get("/time/line")
|
||||
async def get_timing_timeline(
|
||||
request: Request,
|
||||
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
|
||||
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
|
||||
):
|
||||
|
||||
# Retain these for processing timeline data with the correct timezone
|
||||
queried_start_date = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
|
||||
queried_end_date = (datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
|
||||
if end_date else queried_start_date)
|
||||
|
||||
# Fetch and process timing data
|
||||
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
|
||||
|
||||
# Process timeline data
|
||||
timeline_formatted_data = process_timeline(timing_data, queried_start_date, queried_end_date)
|
||||
|
||||
return Response(content=timeline_formatted_data, media_type="text/markdown")
|
||||
|
||||
|
||||
def process_timeline(timing_data, queried_start_date, queried_end_date):
|
||||
timeline_output = []
|
||||
entries_by_date = defaultdict(list)
|
||||
|
||||
for entry in timing_data:
|
||||
# Convert start and end times to datetime objects and localize to Pacific timezone
|
||||
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
|
||||
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
|
||||
|
||||
project_title = truncate_project_title(entry['project']['title']) if entry.get('project') else 'No Project'
|
||||
task_title = entry['title'] if entry.get('title') else 'Untitled'
|
||||
|
||||
# Check if the entry's date falls within the queried date range
|
||||
if queried_start_date <= start_datetime.date() <= queried_end_date:
|
||||
duration_seconds = (end_datetime - start_datetime).total_seconds()
|
||||
duration_hours = format_duration(duration_seconds)
|
||||
|
||||
entries_by_date[start_datetime.date()].append(
|
||||
(start_datetime.strftime('%H:%M:%S'), project_title, task_title, duration_hours)
|
||||
)
|
||||
|
||||
# Sorting and outputting the timeline
|
||||
for date, entries in sorted(entries_by_date.items()):
|
||||
sorted_entries = sorted(entries, key=lambda x: x[0])
|
||||
day_total_duration = sum(Decimal(entry[3]) for entry in sorted_entries)
|
||||
|
||||
if queried_start_date != queried_end_date:
|
||||
timeline_output.append(f"## {date.strftime('%Y-%m-%d')} {date.strftime('%A')} [{day_total_duration}]\n")
|
||||
for start_time, project, task, duration in sorted_entries:
|
||||
timeline_output.append(f" - {start_time} – {project} - {task} [{duration}]")
|
||||
|
||||
return "\n".join(timeline_output)
|
||||
|
||||
|
||||
# CSV
|
||||
@time.get("/time/csv")
|
||||
async def get_timing_csv(
|
||||
request: Request,
|
||||
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
|
||||
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
|
||||
):
|
||||
|
||||
# Fetch and process timing data
|
||||
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
|
||||
|
||||
# Retain these for processing CSV data with the correct timezone
|
||||
queried_start_date = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
|
||||
queried_end_date = (datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
|
||||
if end_date else queried_start_date)
|
||||
|
||||
# Process CSV data
|
||||
csv_data = process_csv(timing_data, queried_start_date, queried_end_date)
|
||||
if not csv_data or csv_data.strip() == "":
|
||||
return Response(content="No CSV data available for the specified date range.", media_type="text/plain")
|
||||
return Response(content=csv_data, media_type="text/csv")
|
||||
|
||||
def process_csv(timing_data, queried_start_date, queried_end_date):
|
||||
project_task_data = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
for entry in timing_data:
|
||||
# Convert start and end times to datetime objects and localize to Pacific timezone
|
||||
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
|
||||
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
|
||||
|
||||
# Ensure the entry's date falls within the queried date range
|
||||
if queried_start_date <= start_datetime.date() <= queried_end_date:
|
||||
duration_seconds = (end_datetime - start_datetime).total_seconds()
|
||||
duration_hours = format_duration(duration_seconds) # Convert duration to hours
|
||||
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
|
||||
|
||||
project_task_data[start_datetime.date()][project_title].append(
|
||||
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
|
||||
)
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output, delimiter='|', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(['Date', 'Project', 'Task', 'Notes', 'Duration'])
|
||||
|
||||
for date, project_tasks in sorted(project_task_data.items()):
|
||||
day_total_duration = Decimal(0)
|
||||
formatted_date = date.strftime('%Y-%m-%d %a')
|
||||
for project, tasks in sorted(project_tasks.items(), key=lambda item: project_sort_key(item[0])):
|
||||
task_summary = defaultdict(Decimal)
|
||||
for task, duration in tasks:
|
||||
task_summary[task] += Decimal(duration)
|
||||
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
|
||||
day_total_duration += project_duration
|
||||
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{str(task_summary[task].quantize(Decimal('0.1')))}]" for task in task_summary])
|
||||
writer.writerow([formatted_date, project, tasks_formatted, '', str(project_duration.quantize(Decimal('0.1')))])
|
||||
writer.writerow([formatted_date, 'Day Total', '', '', str(day_total_duration.quantize(Decimal('0.1')))])
|
||||
writer.writerow(['', '', '', '', ''])
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
# MARKDOWN
|
||||
@time.get("/time/markdown3")
|
||||
async def get_timing_markdown3(
|
||||
request: Request,
|
||||
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
|
||||
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
|
||||
):
|
||||
|
||||
# Fetch and process timing data
|
||||
start = localize_dt(start_date)
|
||||
end = localize_dt(end_date) if end_date else None
|
||||
timing_data = await fetch_and_prepare_timing_data(start, end)
|
||||
|
||||
# Retain these for processing Markdown data with the correct timezone
|
||||
queried_start_date = start.replace(tzinfo=pacific).date()
|
||||
queried_end_date = end.replace(tzinfo=pacific).date() if end else queried_start_date
|
||||
|
||||
# Process Markdown data
|
||||
markdown_formatted_data = process_timing_markdown3(timing_data, queried_start_date, queried_end_date)
|
||||
return Response(content=markdown_formatted_data, media_type="text/markdown")
|
||||
|
||||
def process_timing_markdown3(timing_data, queried_start_date, queried_end_date):
|
||||
markdown_output = []
|
||||
project_task_data = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
for entry in timing_data:
|
||||
# Convert start and end times to datetime objects and localize to Pacific timezone
|
||||
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
|
||||
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
|
||||
|
||||
# Check if the entry's date falls within the queried date range
|
||||
if queried_start_date <= start_datetime.date() <= queried_end_date:
|
||||
duration_seconds = (end_datetime - start_datetime).total_seconds()
|
||||
duration_hours = format_duration(duration_seconds)
|
||||
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
|
||||
|
||||
project_task_data[start_datetime.date()][project_title].append(
|
||||
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
|
||||
)
|
||||
|
||||
for date, projects in sorted(project_task_data.items()):
|
||||
day_total_duration = Decimal(0)
|
||||
tasks_output = []
|
||||
|
||||
for project, tasks in sorted(projects.items(), key=lambda item: project_sort_key(item[0])):
|
||||
task_summary = defaultdict(Decimal)
|
||||
for task, duration in tasks:
|
||||
task_summary[task] += Decimal(duration)
|
||||
|
||||
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
|
||||
day_total_duration += project_duration
|
||||
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{duration}]" for task, duration in task_summary.items()])
|
||||
tasks_output.append(f"- {project} - {tasks_formatted} - *{project_duration}*.")
|
||||
|
||||
if queried_start_date != queried_end_date:
|
||||
markdown_output.append(f"## {date.strftime('%Y-%m-%d %A')} [{day_total_duration}]\n")
|
||||
|
||||
markdown_output.extend(tasks_output)
|
||||
markdown_output.append("")
|
||||
|
||||
return "\n".join(markdown_output)
|
||||
|
||||
|
||||
@time.get("/time/markdown")
|
||||
async def get_timing_markdown(
|
||||
request: Request,
|
||||
start: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
|
||||
end: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
|
||||
):
|
||||
start_date = localize_dt(start)
|
||||
end_date = localize_dt(end)
|
||||
markdown_formatted_data = await process_timing_markdown(start_date, end_date)
|
||||
|
||||
return Response(content=markdown_formatted_data, media_type="text/markdown")
|
||||
|
||||
#return JSONResponse(content={"markdown": markdown_formatted_data}, media_type="text/markdown")
|
||||
|
||||
|
||||
async def process_timing_markdown(start_date: datetime, end_date: datetime): # timing_data, queried_start_date, queried_end_date)
|
||||
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
|
||||
|
||||
queried_start_date = start_date.replace(tzinfo=pacific).date()
|
||||
queried_end_date = (end_date.replace(tzinfo=pacific).date() if end_date else queried_start_date)
|
||||
|
||||
markdown_output = []
|
||||
project_task_data = defaultdict(lambda: defaultdict(list))
|
||||
# pacific = pytz.timezone('US/Pacific')
|
||||
|
||||
for entry in timing_data:
|
||||
# Convert start and end times to datetime objects and localize to Pacific timezone
|
||||
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
|
||||
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
|
||||
|
||||
# Check if the entry's date falls within the queried date range
|
||||
if queried_start_date <= start_datetime.date() <= queried_end_date:
|
||||
duration_seconds = (end_datetime - start_datetime).total_seconds()
|
||||
duration_hours = format_duration(duration_seconds)
|
||||
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
|
||||
|
||||
project_task_data[start_datetime.date()][project_title].append(
|
||||
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
|
||||
)
|
||||
|
||||
for date, projects in sorted(project_task_data.items()):
|
||||
day_total_duration = Decimal(0)
|
||||
tasks_output = []
|
||||
|
||||
for project, tasks in sorted(projects.items(), key=lambda item: project_sort_key(item[0])):
|
||||
task_summary = defaultdict(Decimal)
|
||||
for task, duration in tasks:
|
||||
task_summary[task] += Decimal(duration)
|
||||
|
||||
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
|
||||
day_total_duration += project_duration
|
||||
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{duration}]" for task, duration in task_summary.items()])
|
||||
tasks_output.append(f"|{project}|{tasks_formatted}|{project_duration}|")
|
||||
|
||||
if queried_start_date != queried_end_date:
|
||||
markdown_output.append(f"## {date.strftime('%Y-%m-%d %A')} [{day_total_duration}]\n")
|
||||
tableheader = """|Project|Task(s)|Duration|
|
||||
|-------|-------|-------:|"""
|
||||
markdown_output.append(tableheader)
|
||||
markdown_output.extend(tasks_output)
|
||||
markdown_output.append(f"|TOTAL| |{day_total_duration}|\n")
|
||||
markdown_output.append("")
|
||||
|
||||
return "\n".join(markdown_output)
|
||||
|
||||
|
||||
#JSON
|
||||
@time.get("/time/json")
|
||||
async def get_timing_json(
|
||||
request: Request,
|
||||
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
|
||||
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
|
||||
):
|
||||
|
||||
# Fetch and process timing data
|
||||
start = localize_dt(start_date)
|
||||
end = localize_dt(end_date)
|
||||
timing_data = await fetch_and_prepare_timing_data(start, end)
|
||||
|
||||
# Convert processed data to the required JSON structure
|
||||
json_data = process_json(timing_data)
|
||||
return JSONResponse(content=json_data)
|
||||
|
||||
def process_json(timing_data):
|
||||
structured_data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
||||
|
||||
for entry in timing_data:
|
||||
date_key = entry['start_date'] # Already in 'YYYY-MM-DD' format
|
||||
project_title = entry['project']['title'] if 'title' in entry['project'] else 'No Project'
|
||||
task_title = entry['title']
|
||||
|
||||
structured_data[date_key][project_title][task_title].append(entry)
|
||||
|
||||
return dict(structured_data)
|
||||
|
||||
|
||||
# ROCKETMATTER CSV PARSING
|
||||
|
||||
def load_project_names(filename):
|
||||
with open(filename, 'r', encoding='utf-8') as file:
|
||||
return json.load(file)
|
||||
|
||||
|
||||
def parse_input(fields, project_name_mappings, start_times_by_date):
|
||||
project_code = fields[3].strip()
|
||||
project_name = project_name_mappings.get(project_code, project_code)
|
||||
task_descriptions = fields[4].strip()
|
||||
billing_date_str = fields[6].strip()
|
||||
total_hours = float(fields[9].strip())
|
||||
|
||||
billing_date = datetime.strptime(billing_date_str, "%m/%d/%Y").date()
|
||||
|
||||
# If no start time is recorded for this billing_date, default to 8 AM
|
||||
if billing_date not in start_times_by_date:
|
||||
start_time = pacific.localize(datetime.combine(billing_date, datetime.min.time()).replace(hour=8))
|
||||
else:
|
||||
start_time = start_times_by_date[billing_date]
|
||||
|
||||
# Normalize the task descriptions by converting line breaks and variations of task separators (],), (),)\s to standard form [,]
|
||||
task_descriptions = re.sub(r'(\)|\])(\s+|$)(?=\[|\(|[A-Za-z])', '],', task_descriptions)
|
||||
task_descriptions = re.sub(r'(\r?\n|\r)', ',', task_descriptions)
|
||||
|
||||
# Regex pattern to match task descriptions along with their respective durations.
|
||||
task_pattern = re.compile(r'(.*?)[\[\(](\d+\.\d+)[\]\)]\s*,?')
|
||||
|
||||
tasks_with_durations = task_pattern.findall(task_descriptions)
|
||||
|
||||
tasks = []
|
||||
total_calc_hours = 0
|
||||
|
||||
# Process tasks with explicit durations
|
||||
for task in tasks_with_durations:
|
||||
task_name, duration_hours = task[0].strip(' ,;'), float(task[1])
|
||||
task_name = task_name if task_name else "Undefined Task"
|
||||
tasks.append((task_name, duration_hours))
|
||||
total_calc_hours += duration_hours
|
||||
|
||||
# If there are hours not accounted for, consider them for a task without a specific duration
|
||||
remainder = total_hours - total_calc_hours
|
||||
if remainder > 0:
|
||||
# Include non-specific task or "Undefined Task"
|
||||
non_duration_task = re.sub(task_pattern, '', task_descriptions).strip(' ,;')
|
||||
if not non_duration_task:
|
||||
non_duration_task = "Undefined Task"
|
||||
tasks.append((non_duration_task, remainder))
|
||||
|
||||
# If no specific task durations are found in the description, treat the entire description as one task
|
||||
if not tasks_with_durations:
|
||||
task_name = task_descriptions if task_descriptions else "Undefined Task"
|
||||
tasks.append((task_name, total_hours))
|
||||
|
||||
json_entries = []
|
||||
for task_name, duration_hours in tasks:
|
||||
duration = timedelta(hours=duration_hours)
|
||||
end_time = start_time + duration
|
||||
entry = {
|
||||
"project": project_name,
|
||||
"Task": task_name,
|
||||
"Start_time": start_time.strftime("%Y-%m-%d %H:%M:%S-07:00"),
|
||||
"End_time": end_time.strftime("%Y-%m-%d %H:%M:%S-07:00")
|
||||
}
|
||||
json_entries.append(entry)
|
||||
start_time = end_time
|
||||
|
||||
# Update the start time for the billing_date in the dictionary
|
||||
start_times_by_date[billing_date] = start_time
|
||||
|
||||
return json_entries
|
||||
|
||||
|
||||
async def post_time_entry_to_timing(entry):
|
||||
url = f"{TIMING_API_URL}/time-entries" # The URL for posting time entries
|
||||
headers = {
|
||||
"Authorization": f"Bearer {TIMING_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
'X-Time-Zone': 'America/Los_Angeles' # Set the timezone for the API request
|
||||
}
|
||||
data = {
|
||||
"start_date": entry["Start_time"], # Format these according to the API's requirements
|
||||
"end_date": entry["End_time"],
|
||||
"project": entry["project"],
|
||||
"title": entry["Task"],
|
||||
"notes": "Automatically generated based on Rocketmatter reports.",
|
||||
"replace_existing": False
|
||||
}
|
||||
response = await httpx.post(url, headers=headers, json=data)
|
||||
return response.status_code, response.json()
|
||||
|
||||
|
||||
@time.get("/time/flagemoji/{country_code}")
|
||||
def flag_emoji(country_code: str):
|
||||
offset = 127397
|
||||
flag = ''.join(chr(ord(char) + offset) for char in country_code.upper())
|
||||
return {"emoji": flag}
|
||||
|
||||
|
||||
@time.head("/time/")
|
||||
async def read_root():
|
||||
return {}
|
||||
|
||||
|
||||
@time.get("/time/")
|
||||
async def root():
|
||||
return {"message": "Ring, ring, ring, ring, ring, ring, ring. \n\n. Banana phone."}
|
||||
|
406
sijapi/routers/tts.py
Normal file
406
sijapi/routers/tts.py
Normal file
|
@ -0,0 +1,406 @@
|
|||
from fastapi import APIRouter, UploadFile, HTTPException, Response, Form, File, BackgroundTasks, Depends, Request
|
||||
from fastapi.responses import Response, StreamingResponse, FileResponse
|
||||
from fastapi.responses import StreamingResponse, PlainTextResponse
|
||||
import requests
|
||||
import json
|
||||
import shutil
|
||||
from io import BytesIO
|
||||
import asyncio
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Union, List
|
||||
from pydub import AudioSegment
|
||||
from TTS.api import TTS
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from time import time
|
||||
import torch
|
||||
import traceback
|
||||
import hashlib
|
||||
import uuid
|
||||
import httpx
|
||||
import tempfile
|
||||
import random
|
||||
import re
|
||||
import os
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import HOME_DIR, DATA_DIR, DEFAULT_VOICE, TTS_DIR, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY
|
||||
from sijapi.utilities import sanitize_filename
|
||||
|
||||
|
||||
### INITIALIZATIONS ###
|
||||
tts = APIRouter(tags=["trusted", "private"])
|
||||
|
||||
DEVICE = torch.device('cpu')
|
||||
|
||||
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
|
||||
|
||||
|
||||
@tts.get("/tts/local_voices", response_model=List[str])
|
||||
async def list_wav_files():
|
||||
wav_files = [file.split('.')[0] for file in os.listdir(VOICE_DIR) if file.endswith(".wav")]
|
||||
return wav_files
|
||||
|
||||
@tts.get("/tts/elevenlabs_voices")
|
||||
async def list_11l_voices():
|
||||
formatted_list = ""
|
||||
url = "https://api.elevenlabs.io/v1/voices"
|
||||
headers = {"xi-api-key": ELEVENLABS_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(url, headers=headers)
|
||||
DEBUG(f"Response: {response}")
|
||||
if response.status_code == 200:
|
||||
voices_data = response.json().get("voices", [])
|
||||
formatted_list = ""
|
||||
for voice in voices_data:
|
||||
name = voice["name"]
|
||||
id = voice["voice_id"]
|
||||
formatted_list += f"{name}: `{id}`\n"
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error determining voice ID: {str(e)}")
|
||||
|
||||
return PlainTextResponse(formatted_list, status_code=200)
|
||||
|
||||
|
||||
|
||||
|
||||
def select_voice(voice_name: str) -> str:
|
||||
try:
|
||||
voice_file = VOICE_DIR / f"{voice_name}.wav"
|
||||
DEBUG(f"select_voice received query to use voice: {voice_name}. Looking for {voice_file} inside {VOICE_DIR}.")
|
||||
|
||||
if voice_file.is_file():
|
||||
return str(voice_file)
|
||||
else:
|
||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
||||
except Exception as e:
|
||||
ERR(f"Voice file not found: {str(e)}")
|
||||
ERR(traceback.format_exc())
|
||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
||||
|
||||
|
||||
|
||||
@tts.post("/tts/speak")
|
||||
@tts.post("/v1/audio/speech")
|
||||
async def generate_speech_endpoint(
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
model: str = Form("eleven_turbo_v2"),
|
||||
text: Optional[str] = Form(None),
|
||||
file: Optional[UploadFile] = File(None),
|
||||
voice: Optional[str] = Form(None),
|
||||
voice_file: Optional[UploadFile] = File(None),
|
||||
speed: Optional[float] = Form(1.1),
|
||||
podcast: Union[bool, str] = Form(False),
|
||||
stream: bool = Form(True)
|
||||
):
|
||||
try:
|
||||
|
||||
podcast = podcast if isinstance(podcast, bool) else podcast.lower() == 'true'
|
||||
text_content = await get_text_content(text, file)
|
||||
if stream:
|
||||
model = model if model else await get_model(voice, voice_file)
|
||||
if model == "eleven_turbo_v2":
|
||||
voice_id = await determine_voice_id(voice)
|
||||
audio_stream = await get_audio_stream(model, text_content, voice_id)
|
||||
return StreamingResponse(audio_stream, media_type="audio/mpeg")
|
||||
else:
|
||||
return await stream_tts(text_content, speed, voice, voice_file)
|
||||
else:
|
||||
return await generate_speech(background_tasks, text_content, voice, voice_file, model, speed, podcast)
|
||||
except Exception as e:
|
||||
ERR(f"Error in TTS: {str(e)}")
|
||||
ERR(traceback.format_exc())
|
||||
raise HTTPException(status_code=666, detail="error in TTS")
|
||||
|
||||
|
||||
async def generate_speech(
|
||||
background_tasks: BackgroundTasks,
|
||||
text: str,
|
||||
voice: str = None,
|
||||
voice_file: UploadFile = None,
|
||||
model: str = None,
|
||||
speed: float = 1.1,
|
||||
podcast: bool = False,
|
||||
title: str = None,
|
||||
output_dir = None
|
||||
) -> str:
|
||||
output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
|
||||
if not output_dir.exists():
|
||||
output_dir.mkdir(parents=True)
|
||||
|
||||
try:
|
||||
model = model if model else await get_model(voice, voice_file)
|
||||
|
||||
if model == "eleven_turbo_v2":
|
||||
INFO(f"Using ElevenLabs.")
|
||||
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
|
||||
return str(audio_file_path)
|
||||
|
||||
elif model == "xtts":
|
||||
INFO(f"Using XTTS2")
|
||||
final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, background_tasks, title, output_dir)
|
||||
background_tasks.add_task(os.remove, str(final_output_dir))
|
||||
return str(final_output_dir)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Invalid model specified")
|
||||
except HTTPException as e:
|
||||
ERR(f"HTTP error: {e}")
|
||||
ERR(traceback.format_exc())
|
||||
raise e
|
||||
except Exception as e:
|
||||
ERR(f"Error: {e}")
|
||||
ERR(traceback.format_exc())
|
||||
raise e
|
||||
|
||||
|
||||
|
||||
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
||||
if voice_file or (voice and select_voice(voice)):
|
||||
return "xtts"
|
||||
elif voice and await determine_voice_id(voice):
|
||||
return "eleven_turbo_v2"
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="No model or voice specified")
|
||||
|
||||
async def determine_voice_id(voice_name: str) -> str:
|
||||
hardcoded_voices = {
|
||||
"alloy": "E3A1KVbKoWSIKSZwSUsW",
|
||||
"echo": "b42GBisbu9r5m5n6pHF7",
|
||||
"fable": "KAX2Y6tTs0oDWq7zZXW7",
|
||||
"onyx": "clQb8NxY08xZ6mX6wCPE",
|
||||
"nova": "6TayTBKLMOsghG7jYuMX",
|
||||
"shimmer": "E7soeOyjpmuZFurvoxZ2",
|
||||
DEFAULT_VOICE: "6TayTBKLMOsghG7jYuMX",
|
||||
"Sangye": "E7soeOyjpmuZFurvoxZ2",
|
||||
"Herzog": "KAX2Y6tTs0oDWq7zZXW7",
|
||||
"Attenborough": "b42GBisbu9r5m5n6pHF7"
|
||||
}
|
||||
|
||||
if voice_name in hardcoded_voices:
|
||||
voice_id = hardcoded_voices[voice_name]
|
||||
DEBUG(f"Found voice ID - {voice_id}")
|
||||
return voice_id
|
||||
|
||||
DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.")
|
||||
url = "https://api.elevenlabs.io/v1/voices"
|
||||
headers = {"xi-api-key": ELEVENLABS_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(url, headers=headers)
|
||||
DEBUG(f"Response: {response}")
|
||||
if response.status_code == 200:
|
||||
voices_data = response.json().get("voices", [])
|
||||
for voice in voices_data:
|
||||
if voice_name == voice["voice_id"] or voice_name == voice["name"]:
|
||||
return voice["voice_id"]
|
||||
except Exception as e:
|
||||
ERR(f"Error determining voice ID: {str(e)}")
|
||||
|
||||
return "6TayTBKLMOsghG7jYuMX"
|
||||
|
||||
|
||||
async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = None, output_dir: str = None):
|
||||
|
||||
voice_id = await determine_voice_id(voice)
|
||||
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
||||
payload = {
|
||||
"text": input_text,
|
||||
"model_id": model
|
||||
}
|
||||
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(url, json=payload, headers=headers)
|
||||
output_dir = output_dir if output_dir else TTS_OUTPUT_DIR
|
||||
title = title if title else datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
filename = f"{sanitize_filename(title)}.mp3"
|
||||
file_path = Path(output_dir) / filename
|
||||
if response.status_code == 200:
|
||||
with open(file_path, "wb") as audio_file:
|
||||
audio_file.write(response.content)
|
||||
return file_path
|
||||
else:
|
||||
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
|
||||
|
||||
|
||||
|
||||
|
||||
async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str:
|
||||
if file:
|
||||
return (await file.read()).decode("utf-8").strip()
|
||||
elif text:
|
||||
return text.strip()
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="No text provided")
|
||||
|
||||
|
||||
|
||||
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
|
||||
if voice:
|
||||
return select_voice(voice)
|
||||
elif voice_file and isinstance(voice_file, UploadFile):
|
||||
VOICE_DIR.mkdir(exist_ok=True)
|
||||
|
||||
content = await voice_file.read()
|
||||
checksum = hashlib.md5(content).hexdigest()
|
||||
|
||||
existing_file = VOICE_DIR / voice_file.filename
|
||||
if existing_file.is_file():
|
||||
with open(existing_file, 'rb') as f:
|
||||
existing_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
if checksum == existing_checksum:
|
||||
return str(existing_file)
|
||||
|
||||
base_name = existing_file.stem
|
||||
counter = 1
|
||||
new_file = existing_file
|
||||
while new_file.is_file():
|
||||
new_file = VOICE_DIR / f"{base_name}{counter:02}.wav"
|
||||
counter += 1
|
||||
|
||||
with open(new_file, 'wb') as f:
|
||||
f.write(content)
|
||||
return str(new_file)
|
||||
|
||||
else:
|
||||
DEBUG(f"{datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
|
||||
return select_voice(DEFAULT_VOICE)
|
||||
|
||||
|
||||
async def local_tts(text_content: str, speed: float, voice: str, voice_file = None, podcast: bool = False, background_tasks: BackgroundTasks = None, title: str = None, output_path: Optional[Path] = None) -> str:
|
||||
if output_path:
|
||||
file_path = Path(output_path)
|
||||
else:
|
||||
datetime_str = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
title = sanitize_filename(title) if title else "Audio"
|
||||
filename = f"{datetime_str}_{title}.wav"
|
||||
file_path = TTS_OUTPUT_DIR / filename
|
||||
|
||||
# Ensure the parent directory exists
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
voice_file_path = await get_voice_file_path(voice, voice_file)
|
||||
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
|
||||
segments = split_text(text_content)
|
||||
combined_audio = AudioSegment.silent(duration=0)
|
||||
|
||||
for i, segment in enumerate(segments):
|
||||
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
|
||||
DEBUG(f"Segment file path: {segment_file_path}")
|
||||
segment_file = await asyncio.to_thread(XTTS.tts_to_file, text=segment, speed=speed, file_path=str(segment_file_path), speaker_wav=[voice_file_path], language="en")
|
||||
DEBUG(f"Segment file generated: {segment_file}")
|
||||
combined_audio += AudioSegment.from_wav(str(segment_file))
|
||||
# Delete the segment file immediately after adding it to the combined audio
|
||||
segment_file_path.unlink()
|
||||
|
||||
if podcast:
|
||||
podcast_file_path = PODCAST_DIR / file_path.name
|
||||
combined_audio.export(podcast_file_path, format="wav")
|
||||
|
||||
combined_audio.export(file_path, format="wav")
|
||||
return str(file_path)
|
||||
|
||||
|
||||
async def stream_tts(text_content: str, speed: float, voice: str, voice_file) -> StreamingResponse:
|
||||
voice_file_path = await get_voice_file_path(voice, voice_file)
|
||||
segments = split_text(text_content)
|
||||
|
||||
async def audio_stream_generator():
|
||||
for segment in segments:
|
||||
segment_file = await generate_tts(segment, speed, voice_file_path)
|
||||
with open(segment_file, 'rb') as f:
|
||||
while chunk := f.read(1024):
|
||||
yield chunk
|
||||
os.remove(segment_file)
|
||||
|
||||
return StreamingResponse(audio_stream_generator(), media_type='audio/wav')
|
||||
|
||||
|
||||
|
||||
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
|
||||
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
|
||||
|
||||
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
|
||||
XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en")
|
||||
|
||||
return output_dir
|
||||
|
||||
|
||||
async def get_audio_stream(model: str, input_text: str, voice: str):
|
||||
voice_id = await determine_voice_id(voice)
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
||||
payload = {
|
||||
"text": input_text,
|
||||
"model_id": "eleven_turbo_v2"
|
||||
}
|
||||
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.iter_content(1024)
|
||||
else:
|
||||
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
|
||||
|
||||
|
||||
|
||||
|
||||
def split_text(text, target_length=35, max_length=50):
|
||||
text = clean_text_for_tts(text)
|
||||
sentences = re.split(r'(?<=[.!?"])\s+', text)
|
||||
segments = []
|
||||
current_segment = []
|
||||
|
||||
for sentence in sentences:
|
||||
sentence_words = sentence.split()
|
||||
segment_length = len(' '.join(current_segment).split())
|
||||
|
||||
if segment_length + len(sentence_words) > max_length:
|
||||
segments.append(' '.join(current_segment))
|
||||
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
|
||||
|
||||
current_segment = [sentence]
|
||||
else:
|
||||
current_segment.extend(sentence_words)
|
||||
|
||||
if current_segment:
|
||||
segments.append(' '.join(current_segment))
|
||||
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
|
||||
|
||||
return segments
|
||||
|
||||
|
||||
def clean_text_for_tts(text: str) -> str:
|
||||
if text is not None:
|
||||
text = text.replace("\n", " ").replace("\r", " ")
|
||||
text = re.sub(r"[^\w\s.,;:!?'\"]", '', text)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
return text
|
||||
else:
|
||||
DEBUG(f"No text received.")
|
||||
|
||||
|
||||
|
||||
def copy_to_podcast_dir(file_path):
|
||||
try:
|
||||
# Extract the file name from the file path
|
||||
file_name = Path(file_path).name
|
||||
|
||||
# Construct the destination path in the PODCAST_DIR
|
||||
destination_path = PODCAST_DIR / file_name
|
||||
|
||||
# Copy the file to the PODCAST_DIR
|
||||
shutil.copy(file_path, destination_path)
|
||||
|
||||
print(f"File copied successfully to {destination_path}")
|
||||
except FileNotFoundError:
|
||||
print(f"File not found: {file_path}")
|
||||
except shutil.SameFileError:
|
||||
print(f"Source and destination are the same file: {file_path}")
|
||||
except PermissionError:
|
||||
print(f"Permission denied while copying the file: {file_path}")
|
||||
except Exception as e:
|
||||
print(f"An error occurred while copying the file: {file_path}")
|
||||
print(f"Error details: {str(e)}")
|
265
sijapi/routers/weather.py
Normal file
265
sijapi/routers/weather.py
Normal file
|
@ -0,0 +1,265 @@
|
|||
import asyncio
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi import HTTPException
|
||||
from asyncpg.cursor import Cursor
|
||||
from httpx import AsyncClient
|
||||
from typing import Dict
|
||||
from datetime import datetime
|
||||
from shapely.wkb import loads
|
||||
from binascii import unhexlify
|
||||
from sijapi.utilities import localize_dt
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import VISUALCROSSING_API_KEY, TZ
|
||||
from sijapi.utilities import get_db_connection, haversine
|
||||
from sijapi.routers import locate
|
||||
|
||||
weather = APIRouter()
|
||||
|
||||
|
||||
async def get_weather(date_time: datetime, latitude: float, longitude: float):
|
||||
# request_date_str = date_time.strftime("%Y-%m-%d")
|
||||
DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}")
|
||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||
fetch_new_data = True
|
||||
if daily_weather_data:
|
||||
try:
|
||||
DEBUG(f"Daily weather data from db: {daily_weather_data}")
|
||||
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
|
||||
last_updated = localize_dt(last_updated)
|
||||
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
|
||||
stored_loc = loads(stored_loc_data)
|
||||
stored_lat = stored_loc.y
|
||||
stored_lon = stored_loc.x
|
||||
stored_ele = stored_loc.z
|
||||
|
||||
|
||||
hourly_weather = daily_weather_data.get('HourlyWeather')
|
||||
|
||||
DEBUG(f"Hourly: {hourly_weather}")
|
||||
|
||||
DEBUG(f"\nDEBUG:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n")
|
||||
|
||||
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
|
||||
DEBUG(f"\nDEBUG:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
|
||||
|
||||
if last_updated and (date_time <= datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
|
||||
DEBUG(f"We can use existing data... :')")
|
||||
fetch_new_data = False
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error in get_weather: {e}")
|
||||
|
||||
if fetch_new_data:
|
||||
DEBUG(f"We require new data!")
|
||||
request_date_str = date_time.strftime("%Y-%m-%d")
|
||||
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
|
||||
try:
|
||||
async with AsyncClient() as client:
|
||||
response = await client.get(url)
|
||||
if response.status_code == 200:
|
||||
DEBUG(f"Successfully obtained data from VC...")
|
||||
try:
|
||||
weather_data = response.json()
|
||||
store_result = await store_weather_to_db(date_time, weather_data)
|
||||
if store_result == "SUCCESS":
|
||||
DEBUG(f"New weather data for {request_date_str} stored in database...")
|
||||
else:
|
||||
ERR(f"Failed to store weather data for {request_date_str} in database! {store_result}")
|
||||
|
||||
DEBUG(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
|
||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||
if daily_weather_data is not None:
|
||||
return daily_weather_data
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
||||
except Exception as e:
|
||||
ERR(f"Problem parsing VC response or storing data: {e}")
|
||||
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
||||
else:
|
||||
ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}")
|
||||
except Exception as e:
|
||||
ERR(f"Exception during API call: {e}")
|
||||
|
||||
return daily_weather_data
|
||||
|
||||
|
||||
async def store_weather_to_db(date_time: datetime, weather_data: dict):
|
||||
conn = await get_db_connection()
|
||||
|
||||
try:
|
||||
day_data = weather_data.get('days')[0]
|
||||
DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}")
|
||||
|
||||
# Handle preciptype and stations as PostgreSQL arrays
|
||||
preciptype_array = day_data.get('preciptype', []) or []
|
||||
stations_array = day_data.get('stations', []) or []
|
||||
|
||||
date_str = date_time.strftime("%Y-%m-%d")
|
||||
|
||||
# Get location details from weather data if available
|
||||
longitude = weather_data.get('longitude')
|
||||
latitude = weather_data.get('latitude')
|
||||
elevation = locate.get_elevation(latitude, longitude) # 152.4 # default until we add a geocoder that can look up actual elevation; weather_data.get('elevation') # assuming 'elevation' key, replace if different
|
||||
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
|
||||
|
||||
# Correct for the datetime objects
|
||||
day_data['datetime'] = localize_dt(day_data.get('datetime')) #day_data.get('datetime'))
|
||||
day_data['sunrise'] = day_data['datetime'].replace(hour=int(day_data.get('sunrise').split(':')[0]), minute=int(day_data.get('sunrise').split(':')[1]))
|
||||
day_data['sunset'] = day_data['datetime'].replace(hour=int(day_data.get('sunset').split(':')[0]), minute=int(day_data.get('sunset').split(':')[1]))
|
||||
|
||||
daily_weather_params = (
|
||||
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
|
||||
day_data.get('sunset'), day_data.get('sunsetEpoch'),
|
||||
day_data.get('description'), day_data.get('tempmax'),
|
||||
day_data.get('tempmin'), day_data.get('uvindex'),
|
||||
day_data.get('winddir'), day_data.get('windspeed'),
|
||||
day_data.get('icon'), datetime.now(),
|
||||
day_data.get('datetime'), day_data.get('datetimeEpoch'),
|
||||
day_data.get('temp'), day_data.get('feelslikemax'),
|
||||
day_data.get('feelslikemin'), day_data.get('feelslike'),
|
||||
day_data.get('dew'), day_data.get('humidity'),
|
||||
day_data.get('precip'), day_data.get('precipprob'),
|
||||
day_data.get('precipcover'), preciptype_array,
|
||||
day_data.get('snow'), day_data.get('snowdepth'),
|
||||
day_data.get('windgust'), day_data.get('pressure'),
|
||||
day_data.get('cloudcover'), day_data.get('visibility'),
|
||||
day_data.get('solarradiation'), day_data.get('solarenergy'),
|
||||
day_data.get('severerisk', 0), day_data.get('moonphase'),
|
||||
day_data.get('conditions'), stations_array, day_data.get('source'),
|
||||
location_point
|
||||
)
|
||||
except Exception as e:
|
||||
ERR(f"Failed to prepare database query in store_weather_to_db! {e}")
|
||||
|
||||
try:
|
||||
daily_weather_query = '''
|
||||
INSERT INTO DailyWeather (
|
||||
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
|
||||
tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
|
||||
datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
|
||||
dew, humidity, precip, precipprob, precipcover, preciptype,
|
||||
snow, snowdepth, windgust, pressure, cloudcover, visibility,
|
||||
solarradiation, solarenergy, severerisk, moonphase, conditions,
|
||||
stations, source, location
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38)
|
||||
RETURNING id
|
||||
'''
|
||||
|
||||
# Debug logs for better insights
|
||||
# DEBUG("Executing query: %s", daily_weather_query)
|
||||
# DEBUG("With parameters: %s", daily_weather_params)
|
||||
|
||||
# Execute the query to insert daily weather data
|
||||
async with conn.transaction():
|
||||
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
|
||||
|
||||
|
||||
if 'hours' in day_data:
|
||||
for hour_data in day_data['hours']:
|
||||
try:
|
||||
asyncio.sleep(0.1)
|
||||
# hour_data['datetime'] = parse_date(hour_data.get('datetime'))
|
||||
hour_timestamp = date_str + ' ' + hour_data['datetime']
|
||||
hour_data['datetime'] = localize_dt(hour_timestamp)
|
||||
DEBUG(f"Processing hours now...")
|
||||
DEBUG(f"Processing {hour_data['datetime']}")
|
||||
|
||||
hour_preciptype_array = hour_data.get('preciptype', []) or []
|
||||
hour_stations_array = hour_data.get('stations', []) or []
|
||||
hourly_weather_params = (
|
||||
daily_weather_id,
|
||||
hour_data['datetime'],
|
||||
hour_data.get('datetimeEpoch'),
|
||||
hour_data['temp'],
|
||||
hour_data['feelslike'],
|
||||
hour_data['humidity'],
|
||||
hour_data['dew'],
|
||||
hour_data['precip'],
|
||||
hour_data['precipprob'],
|
||||
hour_preciptype_array,
|
||||
hour_data['snow'],
|
||||
hour_data['snowdepth'],
|
||||
hour_data['windgust'],
|
||||
hour_data['windspeed'],
|
||||
hour_data['winddir'],
|
||||
hour_data['pressure'],
|
||||
hour_data['cloudcover'],
|
||||
hour_data['visibility'],
|
||||
hour_data['solarradiation'],
|
||||
hour_data['solarenergy'],
|
||||
hour_data['uvindex'],
|
||||
hour_data.get('severerisk', 0),
|
||||
hour_data['conditions'],
|
||||
hour_data['icon'],
|
||||
hour_stations_array,
|
||||
hour_data.get('source', ''),
|
||||
)
|
||||
|
||||
try:
|
||||
hourly_weather_query = '''
|
||||
INSERT INTO HourlyWeather (daily_weather_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob,
|
||||
preciptype, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy,
|
||||
uvindex, severerisk, conditions, icon, stations, source)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26)
|
||||
RETURNING id
|
||||
'''
|
||||
# Debug logs for better insights
|
||||
# DEBUG("Executing query: %s", hourly_weather_query)
|
||||
# DEBUG("With parameters: %s", hourly_weather_params)
|
||||
|
||||
# Execute the query to insert hourly weather data
|
||||
async with conn.transaction():
|
||||
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
|
||||
# ERR(f"\n{hourly_weather_id}")
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"EXCEPTION: {e}")
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"EXCEPTION: {e}")
|
||||
|
||||
return "SUCCESS"
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error in dailyweather storage: {e}")
|
||||
|
||||
|
||||
|
||||
async def get_weather_from_db(date_time: datetime, latitude: float, longitude: float):
|
||||
conn = await get_db_connection()
|
||||
|
||||
query_date = date_time.date()
|
||||
try:
|
||||
# Query to get daily weather data
|
||||
query = '''
|
||||
SELECT DW.* FROM DailyWeather DW
|
||||
WHERE DW.datetime::date = $1
|
||||
AND ST_DWithin(DW.location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
|
||||
ORDER BY ST_Distance(DW.location, ST_MakePoint($4, $5)::geography) ASC
|
||||
LIMIT 1
|
||||
'''
|
||||
|
||||
daily_weather_data = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
|
||||
|
||||
if daily_weather_data is None:
|
||||
DEBUG(f"No daily weather data retrieved from database.")
|
||||
return None
|
||||
else:
|
||||
DEBUG(f"Daily_weather_data: {daily_weather_data}")
|
||||
# Query to get hourly weather data
|
||||
query = '''
|
||||
SELECT HW.* FROM HourlyWeather HW
|
||||
WHERE HW.daily_weather_id = $1
|
||||
'''
|
||||
hourly_weather_data = await conn.fetch(query, daily_weather_data['id'])
|
||||
|
||||
day: Dict = {
|
||||
'DailyWeather': dict(daily_weather_data),
|
||||
'HourlyWeather': [dict(row) for row in hourly_weather_data],
|
||||
}
|
||||
DEBUG(f"day: {day}")
|
||||
return day
|
||||
except Exception as e:
|
||||
ERR(f"Unexpected error occurred: {e}")
|
||||
|
||||
|
52
sijapi/tztest.py
Normal file
52
sijapi/tztest.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import os
|
||||
import pandas as pd
|
||||
from typing import Optional
|
||||
from scipy.spatial import cKDTree
|
||||
import numpy as np
|
||||
from dotenv import load_dotenv
|
||||
from dateutil import tz
|
||||
from pathlib import Path
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
CONFIG_DIR = BASE_DIR / "config"
|
||||
ENV_PATH = CONFIG_DIR / ".env"
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
GEONAMES_TXT = DATA_DIR / "geonames.txt"
|
||||
|
||||
load_dotenv(ENV_PATH)
|
||||
|
||||
def load_geonames_data(path: str):
|
||||
columns = ['geonameid', 'name', 'asciiname', 'alternatenames',
|
||||
'latitude', 'longitude', 'feature_class', 'feature_code',
|
||||
'country_code', 'cc2', 'admin1_code', 'admin2_code', 'admin3_code',
|
||||
'admin4_code', 'population', 'elevation', 'dem', 'timezone', 'modification_date']
|
||||
|
||||
data = pd.read_csv(
|
||||
path,
|
||||
sep='\t',
|
||||
header=None,
|
||||
names=columns,
|
||||
low_memory=False
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def find_timezone(data: pd.DataFrame, lat: float, lon: float, country_code: Optional[str] = None):
|
||||
if country_code:
|
||||
data = data[data['country_code'] == country_code]
|
||||
|
||||
# build the spatial index (KDTree)
|
||||
spatial_index = cKDTree(data[['latitude', 'longitude']].values)
|
||||
|
||||
# find the closest index in our spatial index for each point on the grid
|
||||
_, idx = spatial_index.query([(lat, lon)], k=1)
|
||||
|
||||
# get timezone for the closest geonameid
|
||||
timezone = data.iloc[idx]['timezone'].values[0]
|
||||
|
||||
return timezone
|
||||
|
||||
df = load_geonames_data(GEONAMES_TXT)
|
||||
timezone = find_timezone(df, 42.5, 1.5, 'AD')
|
||||
print(timezone)
|
427
sijapi/utilities.py
Normal file
427
sijapi/utilities.py
Normal file
|
@ -0,0 +1,427 @@
|
|||
import re
|
||||
import os
|
||||
from fastapi import Form
|
||||
import re
|
||||
import io
|
||||
from io import BytesIO
|
||||
import base64
|
||||
import math
|
||||
from dateutil import parser
|
||||
from pathlib import Path
|
||||
import filetype
|
||||
from PyPDF2 import PdfReader
|
||||
from pdfminer.high_level import extract_text as pdfminer_extract_text
|
||||
import pytesseract
|
||||
from pdf2image import convert_from_path
|
||||
from datetime import datetime, date, time
|
||||
from typing import Optional, Union, Tuple
|
||||
import asyncio
|
||||
from PIL import Image
|
||||
from dateutil.parser import parse as dateutil_parse
|
||||
from docx import Document
|
||||
import asyncpg
|
||||
from sshtunnel import SSHTunnelForwarder
|
||||
from fastapi import Depends, HTTPException, Request, UploadFile
|
||||
from fastapi.security.api_key import APIKeyHeader
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import DB, GLOBAL_API_KEY, DB, DB_HOST, DB_PORT, DB_USER, DB_PASS, TZ, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR
|
||||
|
||||
api_key_header = APIKeyHeader(name="Authorization")
|
||||
|
||||
def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
|
||||
if request.url.path not in ["/health", "/ip", "/pgp"]:
|
||||
api_key_query = request.query_params.get("api_key")
|
||||
if api_key_header:
|
||||
api_key = api_key.lower().split("bearer ")[-1]
|
||||
if api_key != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
|
||||
raise HTTPException(status_code=401, detail="Invalid or missing API key")
|
||||
|
||||
|
||||
def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str = None, extension: str = None, no_timestamp: bool = False) -> Tuple[Path, Path]:
|
||||
'''
|
||||
Obsidian helper. Takes a datetime and optional subdirectory name, filename, and extension.
|
||||
If an extension is provided, it ensures the path is to a file with that extension.
|
||||
If no extension is provided, it treats the path as a directory.
|
||||
'''
|
||||
year = date_time.strftime(YEAR_FMT)
|
||||
month = date_time.strftime(MONTH_FMT)
|
||||
day = date_time.strftime(DAY_FMT)
|
||||
day_short = date_time.strftime(DAY_SHORT_FMT)
|
||||
timestamp = date_time.strftime("%H%M%S")
|
||||
|
||||
relative_path = Path("journal") / year / month / day
|
||||
|
||||
if not subdir and not filename and not extension:
|
||||
# standard daily note handler, where only the date_time was specified:
|
||||
relative_path = relative_path / f"{day}.md"
|
||||
|
||||
else:
|
||||
|
||||
if subdir:
|
||||
# datestamped subdirectory handler
|
||||
relative_path = relative_path / f"{day_short} {subdir}"
|
||||
|
||||
if filename:
|
||||
if no_timestamp:
|
||||
filename = f"{day_short} {sanitize_filename(filename)}"
|
||||
else:
|
||||
filename = f"{day_short} {timestamp} {sanitize_filename(filename)}"
|
||||
|
||||
if extension:
|
||||
extension = extension if extension.startswith(".") else f".{extension}"
|
||||
filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
|
||||
|
||||
relative_path = relative_path / filename
|
||||
|
||||
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
|
||||
|
||||
os.makedirs(absolute_path.parent, exist_ok=True)
|
||||
|
||||
return absolute_path, relative_path
|
||||
|
||||
def prefix_lines(text: str, prefix: str = '> ') -> str:
|
||||
lines = text.split('\n')
|
||||
prefixed_lines = [f"{prefix}{line.lstrip()}" for line in lines]
|
||||
return '\n'.join(prefixed_lines)
|
||||
|
||||
def f(file):
|
||||
if hasattr(file, 'read') and callable(file.read):
|
||||
return file
|
||||
if isinstance(file, (bytes, bytearray)):
|
||||
return file
|
||||
|
||||
if isinstance(file, Path):
|
||||
file_path = file
|
||||
elif isinstance(file, str):
|
||||
file_path = Path(file)
|
||||
else:
|
||||
raise TypeError("Invalid file type. Expected str, Path, or file-like object.")
|
||||
|
||||
with open(file_path, 'rb') as thefile:
|
||||
return thefile
|
||||
|
||||
|
||||
def get_extension(file):
|
||||
try:
|
||||
if isinstance(file, str):
|
||||
file_path = Path(file)
|
||||
elif isinstance(file, Path):
|
||||
file_path = file
|
||||
else:
|
||||
file_path = Path(file.filename)
|
||||
file_extension = file_path.suffix
|
||||
return file_extension
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Unable to get extension of {file}")
|
||||
raise e
|
||||
|
||||
|
||||
|
||||
def sanitize_filename(text, max_length=255):
|
||||
"""Sanitize a string to be used as a safe filename."""
|
||||
DEBUG(f"Filename before sanitization: {text}")
|
||||
sanitized = re.sub(r'[^\w\s\.-]', '', text).strip()
|
||||
final_filename = sanitized[:max_length]
|
||||
DEBUG(f"Filename after sanitization: {final_filename}")
|
||||
return final_filename
|
||||
|
||||
def bool_convert(value: str = Form(None)):
|
||||
return value.lower() in ["true", "1", "t", "y", "yes"]
|
||||
|
||||
|
||||
def str_to_bool(value: str) -> bool:
|
||||
"""
|
||||
Convert a string to a boolean.
|
||||
Interprets 'true', '1', 'yes', 'y' as True.
|
||||
Interprets 'false', '0', 'no', 'n', '', or any other string as False.
|
||||
"""
|
||||
|
||||
def get_timestamp():
|
||||
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
async def extract_text(file_path: str) -> str:
|
||||
"""Extract text from file."""
|
||||
if file_path.endswith('.pdf'):
|
||||
return await extract_text_from_pdf(file_path)
|
||||
|
||||
elif file_path.endswith('.docx'):
|
||||
return await extract_text_from_docx(file_path)
|
||||
|
||||
|
||||
def clean_text(text):
|
||||
text = text.replace('-', '')
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
text = re.sub(r'[\u200B-\u200D\uFEFF]', '', text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
async def ocr_pdf(file_path: str) -> str:
|
||||
try:
|
||||
images = await asyncio.to_thread(convert_from_path, file_path)
|
||||
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
|
||||
return ' '.join(texts)
|
||||
except Exception as e:
|
||||
ERR(f"Error during OCR: {str(e)}")
|
||||
return ""
|
||||
|
||||
|
||||
async def extract_text_from_pdf(file_path: str) -> str:
|
||||
if not await is_valid_pdf(file_path):
|
||||
ERR(f"Invalid PDF file: {file_path}")
|
||||
return ""
|
||||
|
||||
text = ''
|
||||
num_pages = 0
|
||||
|
||||
# First, attempt to extract text using PyPDF2
|
||||
try:
|
||||
reader = await asyncio.to_thread(PdfReader, file_path)
|
||||
for page in reader.pages:
|
||||
text_content = page.extract_text() + ' ' if page.extract_text() else ''
|
||||
text += text_content
|
||||
num_pages = len(reader.pages)
|
||||
|
||||
# If text was extracted successfully and it's deemed sufficient, return it
|
||||
if text and not should_use_ocr(text, num_pages):
|
||||
return clean_text(text)
|
||||
except Exception as e:
|
||||
ERR(f"Error extracting text with PyPDF2: {str(e)}")
|
||||
|
||||
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
|
||||
try:
|
||||
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
||||
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
|
||||
return clean_text(text_pdfminer)
|
||||
except Exception as e:
|
||||
ERR(f"Error extracting text with pdfminer.six: {e}")
|
||||
|
||||
# If both methods fail or are deemed insufficient, use OCR as the last resort
|
||||
INFO("Falling back to OCR for text extraction...")
|
||||
return await ocr_pdf(file_path)
|
||||
|
||||
async def is_valid_pdf(file_path: str) -> bool:
|
||||
"""Check if the file at file_path is a valid PDF."""
|
||||
try:
|
||||
kind = filetype.guess(file_path)
|
||||
return kind.mime == 'application/pdf'
|
||||
except Exception as e:
|
||||
ERR(f"Error checking file type: {e}")
|
||||
return False
|
||||
|
||||
async def extract_text_from_pdf(file_path: str) -> str:
|
||||
if not await is_valid_pdf(file_path):
|
||||
WARN(f"Invalid PDF file: {file_path}")
|
||||
return ""
|
||||
|
||||
text = ''
|
||||
try:
|
||||
reader = await asyncio.to_thread(PdfReader, file_path)
|
||||
for page in reader.pages:
|
||||
text_content = page.extract_text() + ' ' if page.extract_text() else ''
|
||||
text += text_content
|
||||
if text.strip(): # Successfully extracted text
|
||||
return clean_text(text)
|
||||
except Exception as e:
|
||||
ERR(f"Error extracting text with PyPDF2: {str(e)}")
|
||||
|
||||
try:
|
||||
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
||||
if text_pdfminer.strip(): # Successfully extracted text
|
||||
return clean_text(text_pdfminer)
|
||||
except Exception as e:
|
||||
ERR(f"Error extracting text with pdfminer.six: {str(e)}")
|
||||
|
||||
# Fall back to OCR
|
||||
INFO("Falling back to OCR for text extraction...")
|
||||
try:
|
||||
images = convert_from_path(file_path)
|
||||
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
|
||||
return ' '.join(ocr_texts).strip()
|
||||
except Exception as e:
|
||||
WARN(f"OCR failed: {str(e)}")
|
||||
return ""
|
||||
|
||||
async def extract_text_from_docx(file_path: str) -> str:
|
||||
def read_docx(file_path):
|
||||
doc = Document(file_path)
|
||||
full_text = [paragraph.text for paragraph in doc.paragraphs]
|
||||
return '\n'.join(full_text)
|
||||
|
||||
return await asyncio.to_thread(read_docx, file_path)
|
||||
|
||||
# Correcting read_text_file to be asynchronous
|
||||
async def read_text_file(file_path: str) -> str:
|
||||
# This opens and reads a file asynchronously by offloading to a separate thread
|
||||
return await asyncio.to_thread(_sync_read_text_file, file_path)
|
||||
|
||||
def _sync_read_text_file(file_path: str) -> str:
|
||||
# Actual synchronous file reading operation
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
return file.read()
|
||||
|
||||
def should_use_ocr(text, num_pages) -> bool:
|
||||
if not text:
|
||||
return True # No text was extracted, use OCR
|
||||
word_count = len(text.split())
|
||||
avg_words_per_page = word_count / num_pages
|
||||
return avg_words_per_page < 10
|
||||
|
||||
|
||||
def convert_to_unix_time(iso_date_str):
|
||||
dt = parser.parse(iso_date_str) # Automatically parses datetime with timezone
|
||||
return int(dt.timestamp())
|
||||
|
||||
|
||||
async def get_db_connection():
|
||||
conn = await asyncpg.connect(
|
||||
database=DB,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
host=DB_HOST,
|
||||
port=DB_PORT
|
||||
)
|
||||
return conn
|
||||
|
||||
temp = """
|
||||
def get_db_connection_ssh(ssh: bool = True):
|
||||
if ssh:
|
||||
with SSHTunnelForwarder(
|
||||
(DB_SSH, 22),
|
||||
DB_SSH_USER=DB_SSH_USER,
|
||||
DB_SSH_PASS=DB_SSH_PASS,
|
||||
remote_bind_address=DB_SSH,
|
||||
local_bind_address=(DB_HOST, DB_PORT)
|
||||
) as tunnel: conn = psycopg2.connect(
|
||||
dbname=DB,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
host=DB_HOST,
|
||||
port=DB_PORT
|
||||
)
|
||||
else:
|
||||
conn = psycopg2.connect(
|
||||
dbname=DB,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
host=DB_HOST,
|
||||
port=DB_PORT
|
||||
)
|
||||
|
||||
return conn
|
||||
"""
|
||||
|
||||
def db_localized():
|
||||
# ssh = True if TS_IP == DB_SSH else False
|
||||
return get_db_connection()
|
||||
|
||||
|
||||
def haversine(lat1, lon1, lat2, lon2):
|
||||
""" Calculate the great circle distance between two points on the earth specified in decimal degrees. """
|
||||
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
|
||||
dlat = lat2 - lat1
|
||||
dlon = lon2 - lon1
|
||||
a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
|
||||
c = 2 * math.asin(math.sqrt(a))
|
||||
r = 6371 # Radius of Earth in kilometers
|
||||
return c * r
|
||||
|
||||
|
||||
|
||||
def convert_degrees_to_cardinal(d):
|
||||
"""
|
||||
Convert degrees to cardinal directions
|
||||
"""
|
||||
dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
|
||||
ix = round(d / (360. / len(dirs)))
|
||||
return dirs[ix % len(dirs)]
|
||||
|
||||
|
||||
def localize_dt(dt):
|
||||
initial_dt = dt
|
||||
try:
|
||||
if isinstance(dt, str):
|
||||
dt = dateutil_parse(dt)
|
||||
DEBUG(f"{initial_dt} was a string so we attempted converting to datetime. Result: {dt}")
|
||||
|
||||
|
||||
if isinstance(dt, datetime):
|
||||
DEBUG(f"{dt} is a datetime object, so we will ensure it is tz-aware.")
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=TZ)
|
||||
# DEBUG(f"{dt} should now be tz-aware. Returning it now.")
|
||||
return dt
|
||||
else:
|
||||
# DEBUG(f"{dt} already was tz-aware. Returning it now.")
|
||||
return dt
|
||||
else:
|
||||
ERR(f"Conversion failed")
|
||||
raise TypeError("Conversion failed")
|
||||
except Exception as e:
|
||||
ERR(f"Error parsing datetime: {e}")
|
||||
raise TypeError("Input must be a string or datetime object")
|
||||
|
||||
|
||||
HOURLY_COLUMNS_MAPPING = {
|
||||
"12am": "00:00:00",
|
||||
"2am": "02:00:00",
|
||||
"4am": "04:00:00",
|
||||
"6am": "06:00:00",
|
||||
"8am": "08:00:00",
|
||||
"10am": "10:00:00",
|
||||
"12pm": "12:00:00",
|
||||
"2pm": "14:00:00",
|
||||
"4pm": "16:00:00",
|
||||
"6pm": "18:00:00",
|
||||
"8pm": "20:00:00",
|
||||
"10pm": "22:00:00",
|
||||
}
|
||||
|
||||
def convert_to_12_hour_format(datetime_obj_or_str):
|
||||
if isinstance(datetime_obj_or_str, str):
|
||||
try:
|
||||
datetime_obj = datetime.strptime(datetime_obj_or_str, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
try:
|
||||
datetime_obj = datetime.strptime(datetime_obj_or_str, "%H:%M:%S")
|
||||
except ValueError:
|
||||
return "Invalid datetime string format"
|
||||
elif isinstance(datetime_obj_or_str, time):
|
||||
datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S")
|
||||
else:
|
||||
datetime_obj = datetime_obj_or_str
|
||||
|
||||
if isinstance(datetime_obj_or_str, str):
|
||||
time24 = datetime_obj_or_str
|
||||
else:
|
||||
time24 = datetime_obj.strftime("%H:%M:%S")
|
||||
|
||||
reverse_mapping = {v: k for k, v in HOURLY_COLUMNS_MAPPING.items()}
|
||||
return reverse_mapping.get(time24, "Invalid time")
|
||||
|
||||
|
||||
def encode_image_to_base64(image_path):
|
||||
if os.path.exists(image_path):
|
||||
with Image.open(image_path) as image:
|
||||
output_buffer = BytesIO()
|
||||
image.save(output_buffer, format='JPEG')
|
||||
byte_data = output_buffer.getvalue()
|
||||
base64_str = base64.b64encode(byte_data).decode('utf-8')
|
||||
return base64_str
|
||||
else:
|
||||
DEBUG(f"Error: File does not exist at {image_path}")
|
||||
|
||||
def resize_and_convert_image(image_path, max_size=2160, quality=80):
|
||||
with Image.open(image_path) as img:
|
||||
# Resize image
|
||||
ratio = max_size / max(img.size)
|
||||
new_size = tuple([int(x * ratio) for x in img.size])
|
||||
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
||||
|
||||
# Convert to jpg
|
||||
img_byte_arr = io.BytesIO()
|
||||
img.save(img_byte_arr, format='JPEG', quality=quality)
|
||||
img_byte_arr = img_byte_arr.getvalue()
|
||||
|
||||
return img_byte_arr
|
247
tests/__init__2.py
Normal file
247
tests/__init__2.py
Normal file
|
@ -0,0 +1,247 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
import ipaddress
|
||||
from datetime import datetime, timedelta
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
from O365 import Account, FileSystemTokenBackend
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from colorama import Fore, Style, init
|
||||
|
||||
|
||||
|
||||
init(autoreset=True)
|
||||
class ColorFormatter(logging.Formatter):
|
||||
"""Custom formatter to add colors to log levels."""
|
||||
COLOR_MAP = {
|
||||
logging.DEBUG: Fore.GREEN,
|
||||
logging.INFO: Fore.LIGHTBLUE_EX,
|
||||
logging.WARNING: Fore.YELLOW,
|
||||
logging.ERROR: Fore.RED,
|
||||
logging.CRITICAL: Fore.MAGENTA,
|
||||
}
|
||||
|
||||
def format(self, record):
|
||||
color = self.COLOR_MAP.get(record.levelno, Fore.WHITE)
|
||||
record.levelname = f"{color}{record.levelname}{Style.RESET_ALL}"
|
||||
return super().format(record)
|
||||
|
||||
|
||||
def setup_logger():
|
||||
"""Function to setup a logger; can have multiple handlers"""
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
color_formatter = ColorFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
||||
handler = RotatingFileHandler(f'{LOGS_DIR}/app.log', maxBytes=2000000, backupCount=10)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(color_formatter)
|
||||
|
||||
logger = logging.getLogger('LOGGER')
|
||||
logger.setLevel(logging.INFO)
|
||||
# logger.addHandler(handler)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
|
||||
LOGGER = setup_logger()
|
||||
|
||||
|
||||
### Initial initialization
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
CONFIG_DIR = BASE_DIR / "config"
|
||||
ENV_PATH = CONFIG_DIR / ".env"
|
||||
load_dotenv(ENV_PATH)
|
||||
|
||||
|
||||
### API essentials
|
||||
ROUTERS = os.getenv('ROUTERS', '').split(',')
|
||||
PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',')
|
||||
GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY")
|
||||
HOST_NET = os.getenv("HOST_NET", "127.0.0.1")
|
||||
HOST_PORT = int(os.getenv("HOST_PORT", 4444))
|
||||
HOST = f"{HOST_NET}:{HOST_PORT}"
|
||||
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
|
||||
TRUSTED_SUBNETS = [ipaddress.ip_network(subnet.strip()) for subnet in os.getenv('TRUSTED_SUBNETS', '127.0.0.1/32').split(',')]
|
||||
|
||||
|
||||
### Directories & general paths
|
||||
HOME_DIR = Path.home()
|
||||
ROUTER_DIR = BASE_DIR / "routers"
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
ALERTS_DIR = DATA_DIR / "alerts"
|
||||
os.makedirs(ALERTS_DIR, exist_ok=True)
|
||||
LOGS_DIR = BASE_DIR / "logs"
|
||||
os.makedirs(LOGS_DIR, exist_ok=True)
|
||||
REQUESTS_DIR = LOGS_DIR / "requests"
|
||||
os.makedirs(REQUESTS_DIR, exist_ok=True)
|
||||
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
|
||||
|
||||
|
||||
### Docs & images
|
||||
DOC_DIR = DATA_DIR / "docs"
|
||||
os.makedirs(DOC_DIR, exist_ok=True)
|
||||
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
|
||||
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
|
||||
|
||||
|
||||
### Obsidian & notes
|
||||
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or HOME_DIR / "Nextcloud" / "notes")
|
||||
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
|
||||
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
|
||||
OBSIDIAN_BANNER_DIR = OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR / "banners"
|
||||
os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR, exist_ok=True)
|
||||
|
||||
|
||||
### Database
|
||||
DB = os.getenv("DB", 'sijdb')
|
||||
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
|
||||
DB_PORT = os.getenv("DB_PORT", 5432)
|
||||
DB_USER = os.getenv("DB_USER", 'sij')
|
||||
DB_PASS = os.getenv("DB_PASS")
|
||||
DB_SSH = os.getenv("DB_SSH", "100.64.64.15")
|
||||
DB_SSH_USER = os.getenv("DB_SSH_USER")
|
||||
DB_SSH_PASS = os.getenv("DB_SSH_ENV")
|
||||
DB_URL = f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB}'
|
||||
|
||||
|
||||
### Large language model
|
||||
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
|
||||
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
|
||||
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
|
||||
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
|
||||
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "dolphin-mistral")
|
||||
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
|
||||
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", 'joanne')
|
||||
|
||||
|
||||
### Stable diffusion
|
||||
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
|
||||
os.makedirs(SD_WORKFLOWS_DIR, exist_ok=True)
|
||||
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
|
||||
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
|
||||
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
|
||||
|
||||
|
||||
### Summarization
|
||||
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
|
||||
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 100)) # measured in tokens
|
||||
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
|
||||
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
|
||||
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
|
||||
SUMMARY_INSTRUCT = os.getenv("SUMMARY_INSTRUCT", "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
||||
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "llama3")
|
||||
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 4096))
|
||||
|
||||
|
||||
### ASR & TTS
|
||||
ASR_DIR = DATA_DIR / "asr"
|
||||
os.makedirs(ASR_DIR, exist_ok=True)
|
||||
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'small,base,base-en,tiny,medium,medium-en,large,large-v2,large-v3').split(',')
|
||||
TTS_DIR = DATA_DIR / "tts"
|
||||
os.makedirs(TTS_DIR, exist_ok=True)
|
||||
VOICE_DIR = TTS_DIR / 'voices'
|
||||
os.makedirs(VOICE_DIR, exist_ok=True)
|
||||
PODCAST_DIR = TTS_DIR / "sideloads"
|
||||
os.makedirs(PODCAST_DIR, exist_ok=True)
|
||||
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
|
||||
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
|
||||
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
||||
|
||||
|
||||
HOME_ZIP = os.getenv("HOME_ZIP")
|
||||
LOCATION_OVERRIDES = DATA_DIR / "loc_overrides.json"
|
||||
LOCATIONS_CSV = DATA_DIR / "US.csv"
|
||||
# DB = DATA_DIR / "weatherlocate.db" # deprecated
|
||||
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
|
||||
|
||||
|
||||
### Calendar & email account
|
||||
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE", "False") == "True" else False
|
||||
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE", "False") == "True" else False
|
||||
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
|
||||
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
|
||||
|
||||
IMAP_HOST = os.getenv('IMAP_HOST', '127.0.0.1')
|
||||
EMAIL_ADDRESS = os.getenv('EMAIL_ADDRESS')
|
||||
EMAIL_PASS = os.getenv('EMAIL_PASS')
|
||||
IMAP_PORT = int(os.getenv('IMAP_PORT', 1143))
|
||||
IMAP_ENCRYPTION = os.getenv('IMAP_ENCRYPTION', 'STARTTLS')
|
||||
SMTP_PORT = int(os.getenv('SMTP_PORT', 1025))
|
||||
SMTP_ENCRYPTION = os.getenv('SMTP_ENCRYPTION', 'SSL')
|
||||
PUBLIC_KEY = os.getenv('PUBLIC_KEY')
|
||||
|
||||
|
||||
### Courtlistener & other webhooks
|
||||
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
|
||||
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
|
||||
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
|
||||
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
|
||||
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
|
||||
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
|
||||
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
|
||||
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
|
||||
|
||||
### Passwords & API keys
|
||||
MAC_ID = os.getenv("MAC_ID")
|
||||
MAC_UN = os.getenv("MAC_UN")
|
||||
MAC_PW = os.getenv("MAC_PW")
|
||||
TIMING_API_KEY = os.getenv("TIMING_API_KEY")
|
||||
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
|
||||
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
|
||||
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
|
||||
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
|
||||
|
||||
### Tailscale
|
||||
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
|
||||
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
|
||||
TS_ID = os.getenv("TS_ID", "NULL")
|
||||
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
|
||||
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
|
||||
|
||||
### Cloudflare
|
||||
CF_TOKEN = os.getenv("CF_TOKEN")
|
||||
CF_IP = DATA_DIR / "cf_ip.txt"
|
||||
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json"
|
||||
|
||||
### Caddy
|
||||
BASE_URL = os.getenv("BASE_URL")
|
||||
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
|
||||
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
|
||||
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
|
||||
|
||||
### Maintenance
|
||||
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
|
||||
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours
|
||||
|
||||
### Microsoft Graph
|
||||
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
|
||||
MS365_SECRET = os.getenv('MS365_SECRET')
|
||||
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
|
||||
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
|
||||
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
|
||||
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
|
||||
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
|
||||
MS365_LOGIN_URL = os.getenv('MS365_LOGIN_URL', 'https://login.microsoftonline.com')
|
||||
MS365_AUTHORITY_URL = f'{MS365_LOGIN_URL}/{MS365_TENANT_ID}'
|
||||
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
|
||||
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access'.split(',')) #["Calendars.Read", "Calendars.ReadWrite", "offline_access"]
|
||||
# ["https://graph.microsoft.com/.default"] # OAUTH_SCOPES = os.getenv('OAUTH_SCOPES', 'basic,calendar').split(',')
|
||||
|
||||
with open(MS365_KEY_PATH, 'r') as private_key_file:
|
||||
MS365_KEY = private_key_file.read()
|
||||
|
||||
# MS365_TOKEN_BACKEND = FileSystemTokenBackend(token_path=MS365_TOKEN_PATH)
|
||||
# MS365_CREDENTIALS = (MS365_CLIENT_ID, MS365_SECRET)
|
||||
# MS365_ACCOUNT = Account(
|
||||
# credentials=MS365_CREDENTIALS,
|
||||
# token_backend=MS365_TOKEN_BACKEND,
|
||||
# tenant_id=MS365_TENANT_ID,
|
||||
# scopes=MS365_SCOPE,
|
||||
# auth_flow_type='authorization'
|
||||
# )
|
||||
|
||||
LOGGER.critical(f"Visit https://api.sij.ai/o365/login")
|
189
tests/asr_faster.py
Normal file
189
tests/asr_faster.py
Normal file
|
@ -0,0 +1,189 @@
|
|||
from fastapi import APIRouter, HTTPException, UploadFile, Form, status, Response
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from starlette.concurrency import run_in_threadpool
|
||||
from io import BytesIO
|
||||
from typing import Optional
|
||||
import torch
|
||||
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
||||
from sijapi import WHISPER_CPP_MODELS
|
||||
import torchaudio
|
||||
import ffmpeg
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
### INITIALIZATIONS ###
|
||||
asr = APIRouter(tags=["trusted", "private"])
|
||||
WHISPER_CPP_MODELS = {"small", "base", "base-en", "tiny", "medium", "medium-en", "large", "large-v2", "large-v3"}
|
||||
|
||||
|
||||
async def faster_whisper_mps(audio_bytes: bytes, subtitle: bool = False, model: str = "small", language: str = None, task: str = "transcribe") -> str:
|
||||
model_name = f"openai/whisper-{model}"
|
||||
logger.debug(f"Using model: {model_name} with language setting: {language}")
|
||||
|
||||
# Load the Whisper model and processor
|
||||
model = WhisperForConditionalGeneration.from_pretrained(model_name, low_cpu_mem_usage=True)
|
||||
processor = WhisperProcessor.from_pretrained(model_name)
|
||||
|
||||
# Decode audio using torchaudio
|
||||
audio_io = BytesIO()
|
||||
try:
|
||||
audio_output = ffmpeg.input('pipe:0').output('pipe:1', format='wav').run(input=audio_bytes, capture_stdout=True, capture_stderr=True)
|
||||
audio_io.write(audio_output[0])
|
||||
audio_io.seek(0)
|
||||
except Exception as e: # This will catch any type of exception
|
||||
print(f"Caught an error: {e}")
|
||||
# Now let's load it using torchaudio
|
||||
waveform, sampling_rate = torchaudio.load(audio_io)
|
||||
waveform = waveform.mean(dim=0, keepdim=True) # Convert to mono
|
||||
|
||||
# Process the audio to get input features for the model
|
||||
inputs = processor(waveform.squeeze().numpy(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
|
||||
|
||||
# Generate transcription with the model
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(inputs["input_tensors"], return_segments=True)
|
||||
|
||||
# Decode the generated tokens to text
|
||||
transcription = processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
||||
|
||||
if subtitle:
|
||||
segments = outputs.segments
|
||||
output_text = ""
|
||||
sequence_number = 1
|
||||
for segment in segments:
|
||||
start_time = faster_whisper_seconds_to_srt_time(segment["start"])
|
||||
end_time = faster_whisper_seconds_to_srt_time(segment["end"])
|
||||
text = segment["text"].strip().replace("\n", " ").replace("\r", "") # Clean up text
|
||||
output_text += f"{sequence_number}\n{start_time} --> {end_time}\n{text}\n\n"
|
||||
sequence_number += 1
|
||||
return output_text.strip().replace("\r", "") # Remove any remaining carriage return characters
|
||||
else:
|
||||
return transcription.strip()
|
||||
|
||||
|
||||
def faster_whisper_transcribe(whisper, audio_io, language, task):
|
||||
# Transcribe the audio and get segments
|
||||
segments, info = whisper.transcribe(audio=audio_io, language=language, task=task, beam_size=5)
|
||||
return segments, info
|
||||
|
||||
async def faster_whisper_cpu(audio_io: BytesIO, subtitle: bool = False, model: str = "small", language: str = None, task: str = "transcribe") -> str:
|
||||
logger.debug(f"Using model: {model} with language setting: {language}")
|
||||
whisper = WhisperModel(model, device="cpu", compute_type="int8")
|
||||
|
||||
# Run the blocking transcribe method in a thread pool
|
||||
segments, info = await run_in_threadpool(faster_whisper_transcribe, whisper, audio_io, language, task)
|
||||
|
||||
output_text = ""
|
||||
logger.debug(f"Detected language {info.language} with probability {info.language_probability}%")
|
||||
|
||||
if subtitle:
|
||||
sequence_number = 1
|
||||
for segment in segments:
|
||||
start_time = faster_whisper_seconds_to_srt_time(segment.start)
|
||||
end_time = faster_whisper_seconds_to_srt_time(segment.end)
|
||||
text = segment.text.strip().replace("\n", " ").replace("\r", "") # Clean up text
|
||||
output_text += f"{sequence_number}\n{start_time} --> {end_time}\n{text}\n\n"
|
||||
sequence_number += 1
|
||||
else:
|
||||
for segment in segments:
|
||||
output_text += f"{segment.text.strip()} "
|
||||
|
||||
if subtitle:
|
||||
return output_text.strip().replace("\r", "") # Remove any remaining carriage return characters
|
||||
else:
|
||||
return output_text.strip() # Remove any remaining carriage return characters
|
||||
|
||||
def faster_whisper_seconds_to_srt_time(seconds: float) -> str:
|
||||
"""Convert seconds to SRT time format HH:MM:SS,mmm."""
|
||||
hours, remainder = divmod(seconds, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
milliseconds = (seconds - int(seconds)) * 1000
|
||||
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
|
||||
|
||||
@asr.post("/asr")
|
||||
@asr.post("/asr/whisper")
|
||||
@asr.post("/v1/audio/transcriptions", response_class=FileResponse)
|
||||
async def faster_whisper_mps_endpoint(
|
||||
file: Optional[UploadFile] = Form(None),
|
||||
audio_file: Optional[UploadFile] = Form(None),
|
||||
subtitle: bool = Form(False),
|
||||
model: str = Form(None),
|
||||
output: str = "text",
|
||||
task: str = "transcribe",
|
||||
language: str = None,
|
||||
word_timestamps: bool = False,
|
||||
encode: bool = True
|
||||
) -> FileResponse:
|
||||
if not file and not audio_file:
|
||||
raise HTTPException(status_code=400, detail="Either 'file' or 'audio_file' must be provided.")
|
||||
used_file = file or audio_file
|
||||
model = model if model in WHISPER_CPP_MODELS else "small"
|
||||
logger.debug(f"Received request with model: {model} and language: {language}")
|
||||
content = await used_file.read()
|
||||
output_content = await faster_whisper_mps(content, subtitle, model, language, task) # Pass language to whisper_mps
|
||||
|
||||
# Save the output content to a file if subtitle is True
|
||||
output_filename = "output.srt"
|
||||
if subtitle:
|
||||
with open(output_filename, "w") as f:
|
||||
f.write(output_content)
|
||||
return FileResponse(output_filename, media_type='application/x-subrip', filename=output_filename)
|
||||
else:
|
||||
logger.debug(output_content)
|
||||
return JSONResponse(content={"text": output_content})
|
||||
|
||||
@asr.options("/asr")
|
||||
@asr.options("/asr/whisper")
|
||||
@asr.options("/v1/audio/transcriptions", tags=["transcriptions"])
|
||||
async def faster_whisper_options_for_mps():
|
||||
headers = {
|
||||
"Access-Control-Allow-Origin": "*", # Specify domains or use '*' for all
|
||||
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "Content-Type, Authorization",
|
||||
"Access-Control-Max-Age": "86400", # 24 hours
|
||||
}
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT, headers=headers)
|
||||
|
||||
@asr.post("/asr2")
|
||||
@asr.post("/asr/whisper2")
|
||||
@asr.post("/v1/audio/transcriptions2", response_class=FileResponse)
|
||||
async def faster_whisper_endpoint2(
|
||||
file: Optional[UploadFile] = Form(None),
|
||||
audio_file: Optional[UploadFile] = Form(None),
|
||||
subtitle: bool = Form(False),
|
||||
model: str = Form(None),
|
||||
output: str = "text",
|
||||
task: str = "transcribe",
|
||||
language: str = None,
|
||||
word_timestamps: bool = False,
|
||||
encode: bool = True
|
||||
) -> FileResponse:
|
||||
if not file and not audio_file:
|
||||
raise HTTPException(status_code=400, detail="Either 'file' or 'audio_file' must be provided.")
|
||||
used_file = file or audio_file
|
||||
model = model if model in WHISPER_CPP_MODELS else "small"
|
||||
logger.debug(f"Received request with model: {model} and language: {language}")
|
||||
content = await used_file.read()
|
||||
audio_io = BytesIO(content)
|
||||
output_content = await faster_whisper_cpu(audio_io, subtitle, model, language, task) # Pass language to faster_whisper_cpu
|
||||
|
||||
# Save the output content to a file if subtitle is True
|
||||
output_filename = "output.srt"
|
||||
if subtitle:
|
||||
with open(output_filename, "w") as f:
|
||||
f.write(output_content)
|
||||
return FileResponse(output_filename, media_type='application/x-subrip', filename=output_filename)
|
||||
else:
|
||||
logger.debug(output_content)
|
||||
return JSONResponse(content={"text": output_content})
|
||||
|
||||
@asr.options("/asr2")
|
||||
@asr.options("/asr/whisper2")
|
||||
@asr.options("/v1/audio/transcriptions2", tags=["transcriptions"])
|
||||
async def faster_whisper_options_2():
|
||||
headers = {
|
||||
"Access-Control-Allow-Origin": "*", # Specify domains or use '*' for all
|
||||
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "Content-Type, Authorization",
|
||||
"Access-Control-Max-Age": "86400", # 24 hours
|
||||
}
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT, headers=headers)
|
102
tests/asr_lightning.py
Normal file
102
tests/asr_lightning.py
Normal file
|
@ -0,0 +1,102 @@
|
|||
from ffmpeg import input as ffmpeg_input
|
||||
from ffmpeg import output as ffmpeg_output
|
||||
from ffmpeg import run as ffmpeg_run
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import soundfile as sf
|
||||
from fastapi import APIRouter, HTTPException, UploadFile, Form, status, Response
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from io import BytesIO
|
||||
from typing import Optional
|
||||
from whisperplus.pipelines.lightning_whisper_mlx import LightningWhisperMLX
|
||||
|
||||
## INITIALIZATIONS ##
|
||||
asr = APIRouter(tags=["trusted", "private"])
|
||||
WHISPER_CPP_MODELS = {"small", "base", "base-en", "tiny", "medium", "medium-en", "large", "large-v2", "large-v3"}
|
||||
|
||||
async def lightningmlx_whisper(audio_bytes: bytes, subtitle: bool = False, model: str = "distil-large-v3", language: str = None, task: str = "transcribe") -> str:
|
||||
whisper = LightningWhisperMLX(model=model, batch_size=12, quant=None)
|
||||
|
||||
# Convert audio file to .wav format
|
||||
input_file = 'input_file'
|
||||
output_file = 'output_file.wav'
|
||||
with open(input_file, 'wb') as f:
|
||||
f.write(audio_bytes)
|
||||
ffmpeg_input(input_file).output(output_file).run()
|
||||
|
||||
# Load the wav file
|
||||
audio, samplerate = sf.read(output_file)
|
||||
|
||||
output = whisper.transcribe(audio, language)["text"]
|
||||
|
||||
# Clean up temporary files
|
||||
os.remove(input_file)
|
||||
os.remove(output_file)
|
||||
|
||||
if subtitle:
|
||||
segments = whisper.get_segments(audio, language)
|
||||
output_text = ""
|
||||
sequence_number = 1
|
||||
for segment in segments:
|
||||
start_time = lightningmlx_seconds_to_srt_time(segment["start"])
|
||||
end_time = lightningmlx_seconds_to_srt_time(segment["end"])
|
||||
text = segment["text"].strip().replace("\n", " ").replace("\r", "") # Clean up text
|
||||
output_text += f"{sequence_number}\n{start_time} --> {end_time}\n{text}\n\n"
|
||||
sequence_number += 1
|
||||
return output_text.strip().replace("\r", "") # Remove any remaining carriage return characters
|
||||
else:
|
||||
return output.strip()
|
||||
|
||||
|
||||
def lightningmlx_seconds_to_srt_time(seconds: float) -> str:
|
||||
"""Convert seconds to SRT time format HH:MM:SS,mmm."""
|
||||
hours, remainder = divmod(seconds, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
milliseconds = (seconds - int(seconds)) * 1000
|
||||
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
|
||||
|
||||
@asr.post("/asr")
|
||||
@asr.post("/asr/whisper")
|
||||
@asr.post("/v1/audio/transcriptions", response_class=FileResponse)
|
||||
async def lightningmlx_whisper_endpoint(
|
||||
file: Optional[UploadFile] = Form(None),
|
||||
audio_file: Optional[UploadFile] = Form(None),
|
||||
subtitle: bool = Form(False),
|
||||
model: str = Form(None),
|
||||
output: str = "text",
|
||||
task: str = "transcribe",
|
||||
language: str = None,
|
||||
word_timestamps: bool = False,
|
||||
encode: bool = True
|
||||
) -> FileResponse:
|
||||
if not file and not audio_file:
|
||||
raise HTTPException(status_code=400, detail="Either 'file' or 'audio_file' must be provided.")
|
||||
used_file = file or audio_file
|
||||
model = model if model in WHISPER_CPP_MODELS else "distil-large-v3"
|
||||
logger.debug(f"Received request with model: {model} and language: {language}")
|
||||
content = await used_file.read()
|
||||
output_content = await lightningmlx_whisper(content, subtitle, model, language, task) # Pass language to whisperplus_mlx
|
||||
|
||||
# Save the output content to a file if subtitle is True
|
||||
output_filename = "output.srt"
|
||||
if subtitle:
|
||||
with open(output_filename, "w") as f:
|
||||
f.write(output_content)
|
||||
return FileResponse(output_filename, media_type='application/x-subrip', filename=output_filename)
|
||||
else:
|
||||
logger.debug(output_content)
|
||||
return JSONResponse(content={"text": output_content})
|
||||
|
||||
@asr.options("/asr")
|
||||
@asr.options("/asr/whisper")
|
||||
@asr.options("/v1/audio/transcriptions", tags=["transcriptions"])
|
||||
async def lightningmlx_whisper_options():
|
||||
headers = {
|
||||
"Access-Control-Allow-Origin": "*", # Specify domains or use '*' for all
|
||||
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "Content-Type, Authorization",
|
||||
"Access-Control-Max-Age": "86400", # 24 hours
|
||||
}
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT, headers=headers)
|
||||
|
117
tests/asr_wp.py
Normal file
117
tests/asr_wp.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
from fastapi import APIRouter, UploadFile, Form, HTTPException, File
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from whisperplus.pipelines import mlx_whisper
|
||||
from youtube_dl import YoutubeDL
|
||||
import subprocess
|
||||
import os
|
||||
import uuid
|
||||
import shutil
|
||||
import time
|
||||
from typing import Optional
|
||||
from threading import Thread
|
||||
from sijapi import ASR_DIR, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL
|
||||
|
||||
asr = APIRouter(tags=["trusted", "private"])
|
||||
|
||||
WHISPER_CPP_MODELS = {"small", "base", "base-en", "tiny", "medium", "medium-en", "large", "large-v2", "large-v3", "mlx-community/whisper-large-v3-mlx"}
|
||||
|
||||
class TranscribedText(BaseModel):
|
||||
text: str
|
||||
|
||||
def wp_convert_to_mp3(file_path: str):
|
||||
mp3_file_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
|
||||
subprocess.run(["ffmpeg", "-y", "-i", file_path, mp3_file_path], check=True)
|
||||
return mp3_file_path
|
||||
|
||||
def wp_download_from_youtube(url: str):
|
||||
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
|
||||
ytdl_opts = {
|
||||
'outtmpl': temp_file,
|
||||
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
|
||||
'nooverwrites': True
|
||||
}
|
||||
with YoutubeDL(ytdl_opts) as ydl:
|
||||
ydl.download([url])
|
||||
return temp_file
|
||||
|
||||
def wp_format_srt_timestamp(seconds: float):
|
||||
milliseconds = round(seconds * 1000.0)
|
||||
hours = milliseconds // 3_600_000
|
||||
milliseconds -= hours * 3_600_000
|
||||
minutes = milliseconds // 60_000
|
||||
milliseconds -= minutes * 60_000
|
||||
seconds = milliseconds // 1_000
|
||||
milliseconds -= seconds * 1_000
|
||||
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
|
||||
|
||||
def wp_write_srt(segments: list, output_file: str):
|
||||
with open(output_file, 'w') as f:
|
||||
for i, segment in enumerate(segments, start=1):
|
||||
start = wp_format_srt_timestamp(segment['start'])
|
||||
end = wp_format_srt_timestamp(segment['end'])
|
||||
text = segment['text']
|
||||
f.write(f"{i}\n{start} --> {end}\n{text}\n\n")
|
||||
|
||||
def wp_cleanup_files():
|
||||
while True:
|
||||
now = time.time()
|
||||
for filename in os.listdir(ASR_DIR):
|
||||
file_path = os.path.join(ASR_DIR, filename)
|
||||
if os.path.isfile(file_path):
|
||||
file_age = now - os.path.getmtime(file_path)
|
||||
if file_age > GARBAGE_TTL:
|
||||
os.remove(file_path)
|
||||
time.sleep(GARBAGE_COLLECTION_INTERVAL)
|
||||
|
||||
@asr.post("/asr")
|
||||
async def wp_asr_endpoint(
|
||||
url: Optional[HttpUrl] = None,
|
||||
file: Optional[UploadFile] = File(None),
|
||||
output: str = Form('txt'),
|
||||
language: Optional[str] = Form(None),
|
||||
task: str = Form('transcribe'),
|
||||
model: str = Form('large-v3')
|
||||
):
|
||||
response = await wp_do_asr(url, file, output, language, task, model)
|
||||
|
||||
if output == 'srt':
|
||||
return FileResponse(response, media_type='application/x-subrip', filename='subtitle.srt')
|
||||
elif output == 'json':
|
||||
return JSONResponse(content=response)
|
||||
else:
|
||||
return response
|
||||
|
||||
async def wp_do_asr(url: str = None, file: File = None, output: str = 'txt', language: str = 'en', task: str = 'transcribe', model: str = 'small'):
|
||||
|
||||
if url:
|
||||
audio_path = wp_download_from_youtube(url)
|
||||
elif file:
|
||||
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}_{file.filename}")
|
||||
with open(temp_file, 'wb') as out:
|
||||
out.write(await file.read())
|
||||
audio_path = wp_convert_to_mp3(temp_file)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Either URL or file must be provided")
|
||||
|
||||
transcribe_args = {
|
||||
'path_or_hf_repo': f"mlx-community/whisper-{model}-mlx",
|
||||
'task': task
|
||||
}
|
||||
if language:
|
||||
transcribe_args['language'] = language
|
||||
|
||||
result = mlx_whisper.transcribe(audio_path, **transcribe_args)
|
||||
|
||||
if output == 'srt':
|
||||
srt_output_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.srt")
|
||||
wp_write_srt(result['segments'], srt_output_path)
|
||||
return srt_output_path
|
||||
elif output == 'json':
|
||||
return {"text": result.get("text"), "segments": result.get("segments")}
|
||||
else:
|
||||
return result.get("text")
|
||||
|
||||
# Start the cleanup thread
|
||||
cleanup_thread = Thread(target=wp_cleanup_files, daemon=True)
|
||||
cleanup_thread.start()
|
416
tests/locate copy.py
Normal file
416
tests/locate copy.py
Normal file
|
@ -0,0 +1,416 @@
|
|||
from fastapi import APIRouter, HTTPException, Query, Response
|
||||
from fastapi.responses import JSONResponse, HTMLResponse
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import HTMLResponse
|
||||
import folium
|
||||
import os
|
||||
import asyncpg
|
||||
import time as timers
|
||||
import json
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Any, Dict, List
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta, time
|
||||
import pandas
|
||||
import math
|
||||
import pytz
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Union
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
import psycopg2
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from psycopg2.extras import RealDictCursor
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, HOME_DIR, DATA_DIR, VISUALCROSSING_API_KEY, LOCATIONS_CSV, DB, BASE_URL, GLOBAL_API_KEY, TS_IP, DB, DB_HOST, DB_PORT, DB_USER, DB_PASS, DB_SSH, DB_SSH_USER, DB_SSH_PASS
|
||||
from sijapi.utilities import db_localized
|
||||
from sijapi.routers.weather import get_weather
|
||||
|
||||
locate = APIRouter()
|
||||
|
||||
|
||||
class Location(BaseModel):
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
zip: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
elevation: Optional[float] = None
|
||||
date: Optional[datetime] = None
|
||||
country: Optional[str] = None
|
||||
|
||||
|
||||
def map_location_data(location_data):
|
||||
return {
|
||||
'id': location_data[0],
|
||||
'datetime': location_data[1],
|
||||
'latitude': location_data[2],
|
||||
'longitude': location_data[3],
|
||||
'city': location_data[4],
|
||||
'state': location_data[5],
|
||||
'zip': location_data[6],
|
||||
'street': location_data[7],
|
||||
'elevation': location_data[8]
|
||||
}
|
||||
|
||||
|
||||
|
||||
async def geocode_location(zip_code: Optional[str] = None, latitude: Optional[float] = None, longitude: Optional[float] = None, city: Optional[str] = None, state: Optional[str] = None, country_code: str = 'US') -> Location:
|
||||
try:
|
||||
# Establish the database connection
|
||||
conn = db_localized()
|
||||
|
||||
# Build the SQL query based on the provided parameters
|
||||
query = "SELECT id, street, city, state, country, latitude, longitude, zip, elevation, datetime, date, ST_Distance(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326)) AS distance FROM Locations"
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if latitude is not None and longitude is not None:
|
||||
conditions.append("ST_DWithin(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326), 50000)") # 50 km radius
|
||||
params.extend([longitude, latitude])
|
||||
|
||||
if zip_code:
|
||||
conditions.append("zip = $3 AND country = $4")
|
||||
params.extend([zip_code, country_code])
|
||||
|
||||
if city and state:
|
||||
conditions.append("city ILIKE $5 AND state ILIKE $6 AND country = $7")
|
||||
params.extend([city, state, country_code])
|
||||
|
||||
if conditions:
|
||||
query += " WHERE " + " OR ".join(conditions)
|
||||
|
||||
query += " ORDER BY distance LIMIT 1;"
|
||||
|
||||
DEBUG(f"Executing query: {query} with params: {params}")
|
||||
|
||||
# Execute the query with the provided parameters
|
||||
result = await conn.fetchrow(query, *params)
|
||||
|
||||
# Close the connection
|
||||
await conn.close()
|
||||
|
||||
if result:
|
||||
location_info = Location(
|
||||
latitude=result['latitude'],
|
||||
longitude=result['longitude'],
|
||||
zip=result['zip'],
|
||||
street=result.get('street', ''),
|
||||
city=result['city'],
|
||||
state=result['state'],
|
||||
country=result['country'],
|
||||
elevation=result.get('elevation', 0),
|
||||
distance=result.get('distance')
|
||||
)
|
||||
DEBUG(f"Found location: {location_info}")
|
||||
return location_info
|
||||
else:
|
||||
DEBUG("No location found with provided parameters.")
|
||||
return Location()
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error occurred: {e}")
|
||||
raise Exception("An error occurred while processing your request")
|
||||
|
||||
|
||||
|
||||
def post_location(date: datetime, location):
|
||||
pacific = pytz.timezone('America/Los_Angeles')
|
||||
datetime_entry = pacific.localize(date).isoformat()
|
||||
|
||||
conn = db_localized()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Prepare data to insert or update
|
||||
data_to_insert = (
|
||||
datetime_entry,
|
||||
location.latitude,
|
||||
location.longitude,
|
||||
location.city,
|
||||
location.state,
|
||||
location.zip,
|
||||
location.street,
|
||||
location.elevation
|
||||
)
|
||||
|
||||
# Check if an entry exists
|
||||
cursor.execute('''
|
||||
SELECT id FROM locations
|
||||
WHERE datetime = ? AND latitude = ? AND longitude = ?
|
||||
''', (datetime_entry, location.latitude, location.longitude))
|
||||
existing_entry = cursor.fetchone()
|
||||
|
||||
if existing_entry:
|
||||
# Update existing location
|
||||
cursor.execute('''
|
||||
UPDATE locations
|
||||
SET city = ?, state = ?, zip = ?, street = ?, elevation = ?
|
||||
WHERE id = ?
|
||||
''', (location.city, location.state, location.zip, location.street, location.elevation, existing_entry[0]))
|
||||
else:
|
||||
# Insert new location into database
|
||||
cursor.execute('''
|
||||
INSERT INTO locations (datetime, latitude, longitude, city, state, zip, street, elevation)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', data_to_insert)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'datetime': datetime_entry,
|
||||
'latitude': location.latitude,
|
||||
'longitude': location.longitude,
|
||||
'city': location.city,
|
||||
'state': location.state,
|
||||
'zip': location.zip,
|
||||
'street': location.street,
|
||||
'elevation': location.elevation
|
||||
}
|
||||
|
||||
|
||||
|
||||
def fetch_locations(start: datetime, end: datetime) -> List[dict]:
|
||||
start_date = start.strftime("%Y-%m-%d")
|
||||
end_date = end.strftime("%Y-%m-%d")
|
||||
conn = db_localized()
|
||||
cursor = conn.cursor()
|
||||
|
||||
query = '''
|
||||
SELECT * FROM locations
|
||||
WHERE datetime BETWEEN ? AND ?
|
||||
'''
|
||||
cursor.execute(query, (start_date, end_date))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
locations = [dict(row) for row in rows]
|
||||
return locations
|
||||
|
||||
@locate.get("/map/start_date={start_date_str}&end_date={end_date_str}", response_class=HTMLResponse)
|
||||
async def generate_map_endpoint(start_date_str: str, end_date_str: str):
|
||||
try:
|
||||
start_date = datetime.fromisoformat(start_date_str)
|
||||
end_date = datetime.fromisoformat(end_date_str)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid date format")
|
||||
|
||||
html_content = await generate_map(start_date, end_date)
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
|
||||
@locate.get("/map", response_class=HTMLResponse)
|
||||
async def generate_alltime_map_endpoint():
|
||||
try:
|
||||
start_date = datetime.fromisoformat("2023-01-01")
|
||||
end_date = datetime.now()
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid date format")
|
||||
|
||||
html_content = await generate_map(start_date, end_date)
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
|
||||
async def generate_map(start_date: datetime, end_date: datetime):
|
||||
|
||||
locations = fetch_locations(start_date, end_date)
|
||||
if not locations:
|
||||
raise HTTPException(status_code=404, detail="No locations found for the given date range")
|
||||
|
||||
# Create a folium map centered around the first location
|
||||
map_center = [locations[0]['latitude'], locations[0]['longitude']]
|
||||
m = folium.Map(location=map_center, zoom_start=5)
|
||||
|
||||
# Add markers for each location
|
||||
for loc in locations:
|
||||
folium.Marker(
|
||||
location=[loc["latitude"], loc["longitude"]],
|
||||
popup=f"{loc['city']}, {loc['state']}<br>Elevation: {loc['elevation']}m<br>DateTime: {loc['datetime']}",
|
||||
tooltip=f"{loc['city']}, {loc['state']}"
|
||||
).add_to(m)
|
||||
|
||||
# Save the map to an HTML file and return the HTML content
|
||||
map_html = "map.html"
|
||||
m.save(map_html)
|
||||
|
||||
with open(map_html, 'r') as file:
|
||||
html_content = file.read()
|
||||
|
||||
return html_content
|
||||
|
||||
|
||||
@locate.post("/locate")
|
||||
async def post_locate(locations: Union[Location, List[Location]]):
|
||||
pacific = pytz.timezone('America/Los_Angeles')
|
||||
responses = []
|
||||
|
||||
if isinstance(locations, Location):
|
||||
locations = [locations]
|
||||
|
||||
for location in locations:
|
||||
if location.date:
|
||||
if len(location.date.split(' ')) == 1:
|
||||
date_time_str = location.date + " 14:00" # Default to 2 PM if time is not provided
|
||||
else:
|
||||
date_time_str = location.date
|
||||
else:
|
||||
current_time = datetime.now(pacific)
|
||||
date_time_str = current_time.strftime("%Y-%m-%d %H:%M")
|
||||
|
||||
date_time = datetime.strptime(date_time_str, "%Y-%m-%d %H:%M")
|
||||
|
||||
location_entry = post_location(date_time, location)
|
||||
# Assume fetching weather data is handled separately and similarly needs updating
|
||||
weather_data = get_weather(location_entry['latitude'], location_entry['longitude'], date_time_str.split(' ')[0])
|
||||
responses.append({"location_data": location_entry, "weather_data": weather_data})
|
||||
|
||||
return {"message": "Locations and weather updated", "results": responses}
|
||||
|
||||
|
||||
@locate.get("/locate/{datetime_str}")
|
||||
async def get_locate(datetime_str: str):
|
||||
try:
|
||||
# Parse string to datetime
|
||||
date_time = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M")
|
||||
location_data = get_location(date_time)
|
||||
if not location_data:
|
||||
raise HTTPException(status_code=404, detail="No nearby data found for this date and time")
|
||||
return location_data
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid datetime format. It must be YYYY-MM-DD HH:MM")
|
||||
|
||||
|
||||
|
||||
def get_location(date: datetime) -> list:
|
||||
pacific = pytz.timezone('America/Los_Angeles')
|
||||
formatted_datetime = pacific.localize(date).isoformat()
|
||||
|
||||
conn = db_localized()
|
||||
cursor = conn.cursor()
|
||||
|
||||
locations = []
|
||||
|
||||
# First, try to find an exact match
|
||||
cursor.execute('''
|
||||
SELECT id, datetime, latitude, longitude, city, state, zip, street, elevation FROM locations
|
||||
WHERE datetime = ?
|
||||
''', (formatted_datetime,))
|
||||
location_data = cursor.fetchone()
|
||||
|
||||
if location_data:
|
||||
locations.append(map_location_data(location_data))
|
||||
|
||||
# If no exact match found or only the date was provided, check for records on the same day
|
||||
if len(locations) == 0 or '00:00' in formatted_datetime:
|
||||
start_of_day = pacific.localize(datetime.combine(date.date(), datetime.min.time()))
|
||||
end_of_day = start_of_day + timedelta(days=1)
|
||||
cursor.execute('''
|
||||
SELECT id, datetime, latitude, longitude, city, state, zip, street, elevation FROM locations
|
||||
WHERE datetime >= ? AND datetime < ?
|
||||
ORDER BY datetime DESC
|
||||
''', (start_of_day.isoformat(), end_of_day.isoformat()))
|
||||
day_locations = cursor.fetchall()
|
||||
for loc in day_locations:
|
||||
locations.append(map_location_data(loc))
|
||||
|
||||
# If no records on the same day, find the most recent before the given datetime
|
||||
if not locations:
|
||||
cursor.execute('''
|
||||
SELECT id, datetime, latitude, longitude, city, state, zip, street, elevation FROM locations
|
||||
WHERE datetime < ?
|
||||
ORDER BY datetime DESC
|
||||
LIMIT 1
|
||||
''', (formatted_datetime,))
|
||||
location_data = cursor.fetchone()
|
||||
if location_data:
|
||||
locations.append(map_location_data(location_data))
|
||||
|
||||
conn.close()
|
||||
|
||||
return locations if locations else []
|
||||
|
||||
|
||||
def parse_custom_datetime(date_str, hour_str):
|
||||
try:
|
||||
datetime_str = f"{date_str} {hour_str}:00"
|
||||
naive_datetime = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M")
|
||||
pacific = pytz.timezone('America/Los_Angeles')
|
||||
aware_datetime = pacific.localize(naive_datetime, is_dst=None)
|
||||
return aware_datetime.isoformat()
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error parsing datetime: {str(e)}")
|
||||
|
||||
def post_custom_location(date_time_str, location: Location):
|
||||
conn = db_localized()
|
||||
cursor = conn.cursor()
|
||||
|
||||
datetime_entry = date_time_str # Use the already formatted datetime string
|
||||
|
||||
data_to_insert = (
|
||||
datetime_entry,
|
||||
location.latitude,
|
||||
location.longitude,
|
||||
location.city,
|
||||
location.state,
|
||||
location.zip,
|
||||
location.street,
|
||||
location.elevation
|
||||
)
|
||||
|
||||
cursor.execute('''
|
||||
SELECT id FROM locations
|
||||
WHERE datetime = ? AND latitude = ? AND longitude = ?
|
||||
''', (datetime_entry, location.latitude, location.longitude))
|
||||
existing_entry = cursor.fetchone()
|
||||
|
||||
if existing_entry:
|
||||
cursor.execute('''
|
||||
UPDATE locations
|
||||
SET city = ?, state = ?, zip = ?, street = ?, elevation = ?
|
||||
WHERE id = ?
|
||||
''', (location.city, location.state, location.zip, location.street, location.elevation, existing_entry[0]))
|
||||
else:
|
||||
cursor.execute('''
|
||||
INSERT INTO locations (datetime, latitude, longitude, city, state, zip, street, elevation)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', data_to_insert)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'datetime': datetime_entry,
|
||||
'latitude': location.latitude,
|
||||
'longitude': location.longitude,
|
||||
'city': location.city,
|
||||
'state': location.state,
|
||||
'zip': location.zip,
|
||||
'street': location.street,
|
||||
'elevation': location.elevation
|
||||
}
|
||||
|
||||
@locate.post("/upload_tracker")
|
||||
async def upload_custom_locations(data: Dict[str, Dict[str, Any]]):
|
||||
results = []
|
||||
for date, hours in data.items():
|
||||
for hour, location in hours.items():
|
||||
try:
|
||||
datetime_str = parse_custom_datetime(date, hour)
|
||||
location_input = Location(**location)
|
||||
location_entry = post_custom_location(datetime_str, location_input)
|
||||
results.append(location_entry)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Error processing location for {datetime_str}: {str(e)}")
|
||||
return {"message": "Locations uploaded successfully", "results": results}
|
||||
|
||||
|
||||
|
27
tests/newstest.py
Normal file
27
tests/newstest.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
import newspaper
|
||||
import trafilatura
|
||||
|
||||
aggregator = []
|
||||
nyt = newspaper.build('https://wired.com', language='en')
|
||||
newspaper.NewsPool
|
||||
|
||||
if len(nyt.articles) > 0:
|
||||
for article in nyt.articles:
|
||||
try:
|
||||
downloaded = trafilatura.fetch_url(article.url)
|
||||
# handle the case where no meaningful content extracted
|
||||
if downloaded is None:
|
||||
print(f"Nothing downloaded.")
|
||||
continue
|
||||
result = trafilatura.extract(downloaded, include_comments=False)
|
||||
if result is not None:
|
||||
aggregator.append(result)
|
||||
print(f"Article: {result}")
|
||||
else:
|
||||
print(f"No result.")
|
||||
except Exception as e:
|
||||
print(f'Failed to process article {article.url} with error {str(e)}')
|
||||
|
||||
# Then you can do something with the aggregator list here.
|
||||
else:
|
||||
print(f"No articles obtained!")
|
297
tests/weather copy.py
Normal file
297
tests/weather copy.py
Normal file
|
@ -0,0 +1,297 @@
|
|||
from fastapi import APIRouter, HTTPException, Query, Response
|
||||
from fastapi.responses import JSONResponse, HTMLResponse
|
||||
import psycopg2
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import HTMLResponse
|
||||
import folium
|
||||
import os
|
||||
import asyncpg
|
||||
import time as timers
|
||||
import json
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Any, Dict, List
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta, time
|
||||
import pandas
|
||||
import math
|
||||
import pytz
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Union
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from sshtunnel import SSHTunnelForwarder
|
||||
import psycopg2
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from psycopg2.extras import RealDictCursor
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import HOME_DIR, DATA_DIR, VISUALCROSSING_BASE_URL, VISUALCROSSING_API_KEY, LOCATIONS_CSV, DB, BASE_URL, GLOBAL_API_KEY, TS_IP
|
||||
from sijapi.utilities import get_db_connection, db_localized, haversine
|
||||
|
||||
weather = APIRouter()
|
||||
|
||||
def get_stored_weather(conn, date, hour, zip_code):
|
||||
cursor = conn.cursor()
|
||||
hour = str(int(hour)).zfill(2) # Ensure hour is zero-padded
|
||||
query = '''
|
||||
SELECT H.*, HW.*, DW.*
|
||||
FROM Hours H
|
||||
JOIN Days D ON H.day_id = D.id
|
||||
JOIN Locations L ON H.location_id = L.id
|
||||
LEFT JOIN HourlyWeather HW ON H.hourly_weather_id = HW.id
|
||||
LEFT JOIN DailyWeather DW ON DW.day_id = D.id
|
||||
WHERE D.date = ? AND H.hour = ? AND L.zip = ?
|
||||
'''
|
||||
parameters = (date, hour, zip_code)
|
||||
DEBUG(f"Querying stored weather data with: {query} and parameters: {parameters}")
|
||||
|
||||
cursor.execute(query, parameters)
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
DEBUG(f"Stored weather found for {date} {hour}:00 at zip {zip_code}")
|
||||
return dict(result)
|
||||
DEBUG(f"No stored weather found for {date} {hour}:00 at zip {zip_code}")
|
||||
return None
|
||||
|
||||
|
||||
def get_weather(latitude: float, longitude: float, date: datetime = None):
|
||||
date = date if date else datetime.now()
|
||||
request_date_str = date.strftime("%Y-%m-%d")
|
||||
DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, datetime: {request_date_str}")
|
||||
|
||||
daily_weather_data = get_weather_data(latitude, longitude, date)
|
||||
DEBUG(f"daily_weather_data: {daily_weather_data}")
|
||||
date_now = datetime.now().date()
|
||||
date_requested = date.date()
|
||||
fetch_new_data = False
|
||||
|
||||
if daily_weather_data:
|
||||
last_updated = daily_weather_data.get('last_updated')
|
||||
date_updated = datetime.strptime(last_updated, "%Y-%m-%d %H:%M:%S")
|
||||
if date_updated:
|
||||
DEBUG(f"DEBUG: last updated {date_updated}")
|
||||
else:
|
||||
ERR(f"Take a look here in get_weather...")
|
||||
|
||||
DEBUG(f"LAT: {latitude}\nLON: {longitude}\nDATA LAT: {daily_weather_data.get('latitude')}\nDATA LON: {daily_weather_data.get('longitude')}\n")
|
||||
distance = haversine(latitude, longitude, daily_weather_data.get('latitude'), daily_weather_data.get('longitude'))
|
||||
|
||||
DEBUG(f"haversine distance: {distance}")
|
||||
|
||||
if (date_requested >= date_now or
|
||||
date_updated.date() < date_requested or
|
||||
distance > 8):
|
||||
DEBUG(f"we need new data!")
|
||||
fetch_new_data = True
|
||||
else:
|
||||
fetch_new_data = True
|
||||
|
||||
if fetch_new_data:
|
||||
url = f"{VISUALCROSSING_BASE_URL}/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
|
||||
|
||||
try:
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
DEBUG(f"Successfully obtained data from VC...")
|
||||
try:
|
||||
weather_data = response.json()
|
||||
DEBUG(f"Calling store_weather_data with {weather_data}")
|
||||
store_weather_data(weather_data, date)
|
||||
DEBUG(f"New weather data for {request_date_str} stored in database...")
|
||||
|
||||
daily_weather_data = get_weather_data(latitude, longitude, date)
|
||||
|
||||
if daily_weather_data is not None:
|
||||
DEBUG(f"Weather data retrieved back from database {daily_weather_data}")
|
||||
return daily_weather_data
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
||||
except Exception as e:
|
||||
ERR(f"Problem parsing VC response or storing data: {e}")
|
||||
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
||||
else:
|
||||
ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}")
|
||||
raise HTTPException(status_code=response.status_code, detail="Failed to fetch weather data")
|
||||
except Exception as e:
|
||||
ERR(f"Exception during API call: {e}")
|
||||
raise HTTPException(status_code=500, detail="Failed to fetch weather data")
|
||||
|
||||
return daily_weather_data
|
||||
|
||||
def store_weather_data(weather_data, date: datetime):
|
||||
# DEBUG(f"Reached store_weather_data. weather_data = {weather_data}")
|
||||
|
||||
request_date_str = date.strftime("%Y-%m-%d")
|
||||
conn = db_localized()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
day_data = weather_data.get('days')[0]
|
||||
# DEBUG(f"day_data: {day_data}")
|
||||
|
||||
# Convert 'preciptype' and 'stations' to CSV string if they are lists
|
||||
preciptype_csv = ','.join(day_data['preciptype']) if isinstance(day_data.get('preciptype'), list) else ""
|
||||
stations_csv = ','.join(day_data['stations']) if isinstance(day_data.get('stations'), list) else ""
|
||||
|
||||
daily_weather_params = (
|
||||
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
|
||||
day_data.get('sunset'), day_data.get('sunsetEpoch'),
|
||||
day_data.get('description'), day_data.get('tempmax'),
|
||||
day_data.get('tempmin'), day_data.get('uvindex'),
|
||||
day_data.get('winddir'), day_data.get('windspeed'),
|
||||
day_data.get('icon'), datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
day_data.get('datetime'), day_data.get('datetimeEpoch'),
|
||||
day_data.get('temp'), day_data.get('feelslikemax'),
|
||||
day_data.get('feelslikemin'), day_data.get('feelslike'),
|
||||
day_data.get('dew'), day_data.get('humidity'),
|
||||
day_data.get('precip'), day_data.get('precipprob'),
|
||||
day_data.get('precipcover'), preciptype_csv,
|
||||
day_data.get('snow'), day_data.get('snowdepth'),
|
||||
day_data.get('windgust'), day_data.get('pressure'),
|
||||
day_data.get('cloudcover'), day_data.get('visibility'),
|
||||
day_data.get('solarradiation'), day_data.get('solarenergy'),
|
||||
day_data.get('severerisk', 0), day_data.get('moonphase'),
|
||||
day_data.get('conditions'), stations_csv, day_data.get('source')
|
||||
)
|
||||
|
||||
daily_weather_query = '''
|
||||
INSERT INTO DailyWeather (
|
||||
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
|
||||
tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
|
||||
datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
|
||||
dew, humidity, precip, precipprob, precipcover, preciptype,
|
||||
snow, snowdepth, windgust, pressure, cloudcover, visibility,
|
||||
solarradiation, solarenergy, severerisk, moonphase, conditions,
|
||||
stations, source
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
'''
|
||||
|
||||
cursor.execute(daily_weather_query, daily_weather_params)
|
||||
daily_weather_id = cursor.lastrowid
|
||||
|
||||
cursor.execute('''
|
||||
INSERT INTO Days (date, daily_weather_id) VALUES (?, ?)
|
||||
ON CONFLICT(date) DO UPDATE SET daily_weather_id = excluded.daily_weather_id
|
||||
''', (request_date_str, daily_weather_id))
|
||||
|
||||
if 'hours' in day_data:
|
||||
for hour_data in day_data['hours']:
|
||||
station_hours = ','.join(day_data['stations']) if isinstance(day_data.get('stations'), list) else ""
|
||||
cursor.execute('''
|
||||
INSERT INTO HourlyWeather (day_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob,
|
||||
snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy,
|
||||
uvindex, severerisk, conditions, icon, stations, source)
|
||||
VALUES ((SELECT id FROM Days WHERE date = ?), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
request_date_str, hour_data['datetime'], hour_data.get('datetimeEpoch'), hour_data['temp'], hour_data['feelslike'],
|
||||
hour_data['humidity'], hour_data['dew'], hour_data['precip'], hour_data['precipprob'], hour_data['snow'],
|
||||
hour_data['snowdepth'], hour_data['windgust'], hour_data['windspeed'], hour_data['winddir'], hour_data['pressure'],
|
||||
hour_data['cloudcover'], hour_data['visibility'], hour_data['solarradiation'], hour_data['solarenergy'],
|
||||
hour_data['uvindex'], hour_data.get('severerisk', 0), hour_data['conditions'], hour_data['icon'], station_hours,
|
||||
hour_data.get('source', '')
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
except conn.OperationalError as e:
|
||||
ERR(f"SQLite error occurred: {e}")
|
||||
except Exception as e:
|
||||
ERR(f"Unexpected error occurred: {e}")
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_weather_data(latitude: float, longitude: float, date: datetime):
|
||||
if isinstance(date, str):
|
||||
date = datetime.fromisoformat(date)
|
||||
|
||||
request_date_str = date.strftime("%Y-%m-%d")
|
||||
with conn.connect(DB) as conn:
|
||||
conn.row_factory = conn.Row
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
SELECT DW.*, D.date AS last_updated FROM Days D
|
||||
JOIN DailyWeather DW ON D.daily_weather_id = DW.id
|
||||
WHERE D.date = ?
|
||||
''', (request_date_str,))
|
||||
|
||||
weather_data = cursor.fetchone()
|
||||
|
||||
if not weather_data:
|
||||
return None
|
||||
|
||||
weather_dict = dict(weather_data)
|
||||
|
||||
cursor.execute('''
|
||||
SELECT HW.* FROM HourlyWeather HW
|
||||
JOIN Days D ON HW.day_id = D.id
|
||||
WHERE D.date = ?
|
||||
''', (request_date_str,))
|
||||
|
||||
hourly_data_rows = cursor.fetchall()
|
||||
|
||||
hourly_data = {
|
||||
row['datetime']:
|
||||
{
|
||||
'temperature': row['temp'],
|
||||
'conditions': row['conditions'],
|
||||
'wind': f"{row['windspeed']}mph {row['winddir']}",
|
||||
'feelslike': row['feelslike'],
|
||||
'precip': row['precip'],
|
||||
'precipprob': row['precipprob'],
|
||||
'snow': row['snow'],
|
||||
'humidity': row['humidity'],
|
||||
'dew': row['dew'],
|
||||
'uvindex': row['uvindex'],
|
||||
'solarradiation': row['solarradiation'],
|
||||
'icon': row['icon']
|
||||
}
|
||||
for row in hourly_data_rows
|
||||
}
|
||||
|
||||
weather_dict['hours'] = hourly_data
|
||||
|
||||
return weather_dict
|
||||
|
||||
|
||||
def get_daily_weather_data(date: datetime):
|
||||
request_date_str = date.strftime("%Y-%m-%d")
|
||||
with conn.connect(DB) as conn:
|
||||
conn.row_factory = conn.Row
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
SELECT DW.* FROM Days D
|
||||
JOIN DailyWeather DW ON D.daily_weather_id = DW.id
|
||||
WHERE D.date = ?
|
||||
''', (request_date_str,))
|
||||
daily_weather_data = cursor.fetchone()
|
||||
|
||||
if daily_weather_data is not None:
|
||||
return dict(daily_weather_data) # Convert conn.Row object to dictionary
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def get_hourly_weather_data(date: datetime):
|
||||
request_date_str = date.strftime("%Y-%m-%d")
|
||||
with conn.connect(DB) as conn:
|
||||
conn.row_factory = conn.Row
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
SELECT DW.* FROM Days D
|
||||
JOIN DailyWeather DW ON D.daily_weather_id = DW.id
|
||||
WHERE D.date = ?
|
||||
''', (request_date_str,))
|
||||
daily_weather_data = cursor.fetchone()
|
||||
|
||||
if daily_weather_data is not None:
|
||||
return dict(daily_weather_data)
|
||||
else:
|
||||
return None
|
Loading…
Reference in a new issue