Initial commit

This commit is contained in:
sanj 2024-06-23 14:51:45 -07:00
commit 570d0026a8
55 changed files with 14031 additions and 0 deletions

56
.gitignore vendored Normal file
View file

@ -0,0 +1,56 @@
\# Ignore specific data files and directories
sijapi/data/calendar.ics
sijapi/data/asr/
sijapi/data/geocoder/
sijapi/data/courtlistener/
sijapi/data/tts/
sijapi/data/db/
sijapi/data/sd/workflows/private
sijapi/data/*.pbf
sijapi/data/geonames.txt
sijapi/data/sd/images/
sijapi/config/O365/
sijapi/local_only/
sijapi/testbed/
**/.env
**/.config.yaml
**/*.log
**/logs/
**/__pycache__
**/.DS_Store
**/*.ics
**/*.sqlite
**/private/
**/*sync-conflict*.*
**/*.db
**/*.mp3
**/*.mp4
**/*.wav
**/*.pyc
**/.ipynb_checkpoints/
venv/
env/
.venv/
.vscode/
.idea/
*~
*.swp
*.swo
*.com
*.class
*.dll
*.exe
*.o
*.so
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
ehthumbs.db
Thumbs.db
sijapi/testbed/

17
README.md Normal file
View file

@ -0,0 +1,17 @@
```
.x+=:. . .
z` ^% @88> .. @88>
. <k %8P 888> .d`` %8P
.@8Ned8" . "8P u @8Ne. .u .
.@^%8888" .@88u . us888u. %8888:u@88N .@88u
x88: `)8b. ''888E` u888u. .@88 "8888" `888I 888. ''888E`
8888N=*8888 888E `'888E 9888 9888 888I 888I 888E
%8" R88 888E 888E 9888 9888 888I 888I 888E
@8Wou 9% 888E 888E 9888 9888 uW888L 888' 888E
.888888P` 888& 888E 9888 9888 '*88888Nu88P 888&
` ^"F R888" 888E "888*""888"~ '88888F` R888"
"" 888E ^Y" ^Y' 888 ^ ""
888E *8E
888P '8>
.J88" "
```

1
r2r Submodule

@ -0,0 +1 @@
Subproject commit c9e7c04a6bf9f8156cf793ee23379eb0f92f2d38

47
requirements.txt Normal file
View file

@ -0,0 +1,47 @@
python-dotenv
setuptools
PyPDF2
fastapi
pdf2image
pdfminer
pytesseract
python-dateutil
python-docx
hypercorn
starlette
httpx
pydantic
pytz
requests
aiohttp
paramiko
tailscale
pandas
pydub
torch
selenium
webdriver_manager
faster_whisper
filetype
html2text
markdown
ollama
aiofiles
bs4
imbox
newspaper3k
python-magic
urllib3
whisper
huggingface_hub
numpy
tqdm
tiktoken
numba
scipy
vectordb
IPython
torchaudio
lxml
lxml_html_clean
pdfminer.six

61
setup.py Normal file
View file

@ -0,0 +1,61 @@
from setuptools import setup, find_packages
setup(
name='sijapi',
version='0.1',
packages=find_packages(),
entry_points={
'console_scripts': [
'sijapi = sijapi.__main__:main',
],
},
install_requires=[
'fastapi',
'python-dotenv',
'hypercorn',
'setuptools',
'PyPDF2',
'pdf2image',
'pdfminer',
'pytesseract',
'python-dateutil',
'python-docx',
'starlette',
'httpx',
'pydantic',
'pytz',
'requests',
'aiohttp',
'paramiko',
'tailscale',
'pandas',
'pydub',
'torch',
'selenium',
'webdriver_manager',
'faster_whisper',
'filetype',
'html2text',
'markdown',
'ollama',
'aiofiles',
'bs4',
'pdfminer.six',
'lxml_html_clean',
'imbox',
'newspaper3k',
'python-magic',
'urllib3',
'whisper',
'huggingface_hub',
'numpy',
'tqdm',
'tiktoken',
'numba',
'scipy',
'vectordb',
'IPython',
'torchaudio'
],
)

92
sij.asc Normal file
View file

@ -0,0 +1,92 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
mQINBGY+fL4BEADCpz8FAfa6/7i9mEQCYlwwP2k9DlrUzz+u9BL4BmuoTEcGty9M
7EA2ivRxXo371IIMjL/GyAa8I3WHMEhxuRlGldUQaHzo6PicTn+OiLJ/g2vCfStN
jIYog3WC25P7Es1n1hDuOu8rUL93twXZ4NevgYx+G44M7Q+/1AbSXf83kpawlHhg
HcGmH2vt9UulfTGAvN9s2sH2pn89812lpWLSdPARNw09ePZy4RdiEgJ6t+S+wjaE
Ue/H4FcQC1MLrQnkW5soUOduY9HN0iUk/xZqqkRQctl3ds5oInE483vQsL0HKFvs
MB8lBdXTbVzxvpFe+fvT8d6hiZ/YgxIUEl1KZLDd3atqj+UREuG+LABZUKC4nSUP
EXneXUqi4qVCW9827K9/H+IKahe8OE+OrZAsSfLtsp4AznIxgyQbvpUZzCuRASJN
Kt1cjcJBOv5L0HJ8tVykZd23WuKUXiyxTs1MxsDGyjew30IsAg4WNO/iw9vBO/Yu
pfjlZTcgbghdIuNmOrnCyzKWtUxxfDtWwEBBshbTKusOaGhauBaHrRVE7lKlTblM
x1JIzYBziDmFy25J1XvYb3guilk1yy54poLQaEcE54mQYWHKCNS4eQeL5dJR3Nmu
Pt9GXdMyNO3uyog3WYpyYqch+osbBsHFVNUClxMycnyqZzHQeZHPNJBzJwARAQAB
tC5TYW5neWUgSW5jZS1Kb2hhbm5zZW4gKEF0dG9ybmV5KSA8c2lqQHNpai5sYXc+
iQJXBBMBCABBAhsDBQkHhh8tBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMjqK
LEezdiJLNhO3U1smWu2+W0QFAmY+fPUCGQEACgkQU1smWu2+W0SwBQ/+L5S1fIop
6iQ/6gQENBNCUVgACWP0/ViJzQGo4iF3UZkV5KV8pgk/TenZSXCLxUj6UpSAe25m
vtrGV4NCL2hLn1NPK11Na6IM1ykfh/L67NKeCqmtQYwNLwW0o0fvUpK9fahPxhmv
EFo+lVCabQndgzmLxnUhxH4qkGSejsaSFoJQ6fVl/DExCL4w/R5rStnRMKDtkuF1
ONfjZpuLrAylx8Ypf/rocQYn5AJcRD5ZL2bGgDZNe85VNBFmD3b2cGSVpm3J6Rg/
fPfs1lgtpgXWbBDCF8nRY326Utbr3qoeZUXVQjVZ05Q2SpUYFHiDZJ3EFwQikg5n
cIBfcXQZQhTq/OK0eS0vB1li8m1ce9m8iMC+Pxe5toPkxFV5RO1+o5PG1SyOfzfV
F1c0O9JQqdJzRHoTuqLtVhlmRVBU2d6TjWYlZ6TwPShSTLu0Tkm4EeFJS4oag75d
q7LlIIvrWS4n3CqVpC/PEIUtclytkOkvNQaSWHEVkappS3UjkX1BJmaI8zXYh9jh
sV/5FckvwYnky+w6geFOBs34NW0rg9oNw4KNAywYcOPbI/Ev1z57my+MpA5msw+B
ww9sFC+tzQCSJl0FU2Dg2YMnyqfUtGr9HfXdAGuuUVh+cYFmEdwwZqBWl37pNIGL
SxfF1AdrlHCSpJcLVETe80UraMFAI7tyOwe0L1Nhbmd5ZSBJbmNlLUpvaGFubnNl
biA8c2FuZ3llaWpAd2VzdGVybmxhdy5vcmc+iQJUBBMBCAA+FiEEMjqKLEezdiJL
NhO3U1smWu2+W0QFAmY+fOgCGwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgEC
F4AACgkQU1smWu2+W0RlnBAArwaFta9NTRdubTqctv1EET1D9OXAE/R5vdSk2jRQ
1CMYmv6KeMm0Rl7+dNFet/vJOEtITF7TZHnt7WBy7n5m+SIoARsaZYEchjZKsE2g
6RvRWqFGYuUYQWTRKsw0b2tT16BaNLKdV/w3ndRQNS6wDJrW1dRnIWxm4z26d3/H
Rt3o8+LUVxdSWGLliKZU00S+FNPVSwWe/X7+CoIE7T5XZL+OIEJ6DfpK2pkHKT6D
FswF3KOLG36vz5eISk4AT+o9AEoFIpX0hce3DMixEYQSgKN230K8RchC59bO81zE
w7Mic4vpn/wKFhicn+0BA1aJzzOd8iEwiA0p5baq4b2xIwCBiO4uv/HXR1SN1Tfk
QozjAGzl8LzrmwGTWOtOSk/7ckPhPR2MGNhMdtJ7rPeHxImJLh+/f4uBmYnQUdw4
0j3sMpJmrShW5dXJ8YHqVFfqabYD8HkBztdYI0qGJDpQjEbW6V+DvMWQXOZ8c1ul
NN2vZyY25RkypMQLiphImJa+q6eGtBEas40MeAkgQKIBPBBpb6W1km+m6UnOADKB
0/vOWcZMgijyMPp7WvwXbOwmXI27rHsUTvhFDLPI113a9I5bU8j6VyW2s/sst3Xc
OQDzEgR3KvD4dWjczIg6yliIq9eM5hskpsYyfDfWRWrIbR3Tg8XPwnQRB9dPEHIy
rKS0KVNhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2FuZ3llQHJpc2V1cC5uZXQ+iQJU
BBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fQYCGwMFCQeGHy0FCwkI
BwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0SKGA//VRGpS7IwOOlHF7OI
+LEMDebLpLB2PswfWrK+sI9YdXXV/CaT0NcCz2HPCoK+coaDkl4cxh10ykVbjO36
wZc/rvhpzga2wMLpBLNwpTvSlfMwsCQeRQay498bgdR59gf1hYa/dPYKKrBgNxHa
Kc3dMDWU0adpV4zV1s/iFNQQZfmhUah+8TTlB03hahPzn8V7CqQF+jTfSXiWPv/V
eD1W6Sc1juvLTVxTThbM5ewiIhMP2t7KM+M4viOEqce79IcE2HTcpCaEI7Lh/Eld
9VBZZk/gENuPqyQuLbOIOQhC6LYRZkZC9Vv1FDutfWV5ZBPyaTY/n5pGW3lo+Tfa
FLSamQcD6dyiGm/ZyQbPUDt2aWhqRGr7VvvtfyXLazL9T9Y6ASr5UjLakPr5ihUz
B8InRch9ACPbu7QSIGFk9PQgHme2Cd/HMRLIALnkAmrafgDE+14Rlp9qI2nYhWdD
jkZcLalPXQCDBxUfj1q192Nn3wlKsDkDd2RWT7Mc2RJq2FR36KADPMtz2oJPSib4
eRgI40E9Wv+zqHDDTU2K/bLi3nmBHvKnXWXPyiBPVL+CAoAhkYHHJwNuRQfxlukq
heS4/CMBRB04foTeu2ltl6/sQdAIyBGKbOC6fMyhJFYbi16nWI6j7iw2XQnqyitu
jC8Pz14NfIAQTpKCVcV32Kn2k1+0I1Nhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2lq
QGVudi5lc3E+iQJUBBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fRIC
GwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0Rbxw/+
OMYnlyXvo146+3M6JGdvW36CWmc9ZcmaU+xJM3FnG91WNo5J8MnHl0Ks9BwjNWtm
VJgFEdi2EVpSLJnYdQyJILCNt8RAclYvbFHYUOIDEEC2yr5ZKt/odwYAXPxaqQ4O
Sj7R2GbLA52O8zGWfARBAnAQycrlBRjItdpzGeWgRST8O/ot/IkU7xsAKW72E2VB
9jlCahp5c01lEideVqzVhk3z6GzVz1NUKsglgEOmTIjld4mMs+4GX/93q0u1erKO
I7Q6RL6lfdc2opGi5jFMXGWhLLgX2SSsBFJRuSQGnTpbx3XWFS5uA+cku7Fh0fC0
MKr2vsY18Z6OqU0MdQm6ovIVcvhzIdGfnBU9Ct98DMiUhDCmx3o9XneWj1n7kWKM
gT8s8AvE27tidtkZApwIKHdUy6qfyqwRjxE+KdL6Eh48x3TVYep+wfSfPJ1eq9Ne
7WWXKUx6FGNH01hpQdTLbCYqmwMa03och1wwyi+0wc8rHe6k6y2tURtP3mINkDeV
u1QmVaGRDA2r7oDm9UsFeupGsbFBnTkQIfJgnrLRJFfN2FDJPZDcd/VS71AOSL5C
jY+Dr/WHYPWeN8MHXfG4r/P41wsrnAJEAzSvLRQ9GYCLPe825W+uDJx9eMePodFa
BeIBcM633WXpbIXHnRQhPDfTzejCejO6GoPE7PbtBBi5Ag0EZj58vgEQAPUqNOgQ
kAPd/S+nkGAfvnzC5UD6lVWaQTVL/xU2K1J8l11K5Ck4nq7oMKhzEitu0lA+jG7q
JVwXMj9+rnoGlbIYmmxCZYpSit930Mss3HjYU8IAF4nybGwc5+wO77cldk3WJTI0
EkFgiM4Jk6Gk/tRf1LgMIfJIUgm8MooPLqg2z5Pj+bbwxw42A20enEwtF3ivEETJ
wuJwsp5uCOAfzOGqqBvp19PMTPynUBuwEXCkJfb0CCz+5yhjoi6ZjCVXxjuoe2wN
jFwoYd8odfSuvC6Fh9qqXnjF7HZLxEyN7K1L/y/sWarsN01zbUUI3kZlnTuamDu4
LdZtl2q3QqDyxmzHIWLTa1qL0s3WooB7JJqBYaNmQjLHadoktZ4vfhl7kjXYsg+i
84oipL83u2cRHplpqnRk9qVwNdW01EObjNafWY6t3942sM4e/yOdQiaXlxivPuHV
VYwme6K53lmGcV3ipMWRpNkme+oKV/TdYTTdlDaLgC8ga5AW6poNoSp5UpNeOs0E
mxIZivpRQSCr3g+jScy0RdX/+tI1gWe+2ZIHFwR+1WsXvLXHyd1wVyH4vDxSf1bE
VRVsXLZDT/xMGDzNzAC76kzoIykrcndFiTbNzB/LjZJuls6fRdN07bTcymWEKYiP
Ia6iGdag6+ueoX4eDzbjCvldKtkfr/EhB7MfABEBAAGJAjwEGAEIACYWIQQyOoos
R7N2Iks2E7dTWyZa7b5bRAUCZj58vgIbDAUJB4YfLQAKCRBTWyZa7b5bRLZdEACk
AaXNVeywC9+X6bdwkKV5Jl6Hv238cGd58TuVbjd+tii1JazbKEqCAr5tTlGtrUZg
fyjM0z5sMKDSZ15paX4xDbDs+xdfMxLVdjmFlZgwTrrTSIx3ODxPo/sSeyrzGZrQ
hlZjOHP1Bvln0OTQwK0yE3Eaip0FhIpJA5FX3yrZfvza3St5leNOXsZgEri68cgf
mVhS9tBD2I9TpCVwgq5vRnloAMgtQBYr8N9glXBfs2WsPhU96HSSH88osJW+lCkG
vTtzQBEjnnSQ/ssHBYz4DfpsJe1fbM+9WVow6q2nkUhqg5TfdAt4H0ra2uPXnNz8
lvQObVHlw7T0w5UTzgBdlCyYplyTG2gcZi+UWzit6YH9DH82j1otcq3+3NlrKwo0
TSJKZNagiqgJNZ1mhJQTt3JDacFFkBBxLf6trruuyInRU1leo87hzHCxIlMbQPqh
ogtV+W9FHElVJwoTQi8YF+0AacZPzK8wJmlPLxBeqs+ULJ8H5wZxlEBB1Jj91/W9
6R8m2IUZCsXNNpYU+f7uB8x0RUS3pU8S7GcwdJmOa16Xc4VdfWugm4TTEtajeSYC
ek5j/2s/QkAum5slT2Y6Aam0Jj/IhsGHKVEnR6DS01mZqVeeu0giPFUO4ZX5C0n9
mAmw/ZUGIOj6ls3KMBHv4pqQI7nd00tW8eIMgKGgKQ==
=PhPl
-----END PGP PUBLIC KEY BLOCK-----

251
sijapi/__init__.py Normal file
View file

@ -0,0 +1,251 @@
import os
import json
from pathlib import Path
import ipaddress
import multiprocessing
from dotenv import load_dotenv
from dateutil import tz
from pathlib import Path
from pydantic import BaseModel
import traceback
import logging
from .logs import Logger
# from sijapi.config.config import load_config
# cfg = load_config()
### Initial initialization
BASE_DIR = Path(__file__).resolve().parent
CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = CONFIG_DIR / ".env"
LOGS_DIR = BASE_DIR / "logs"
# Create logger instance
package_logger = Logger(__name__, LOGS_DIR)
LOGGER = package_logger.get_logger()
def DEBUG(log_message): LOGGER.debug(log_message)
def INFO(log_message): LOGGER.info(log_message)
def WARN(log_message): LOGGER.warning(log_message)
def ERR(log_message):
LOGGER.error(log_message)
LOGGER.error(traceback.format_exc())
def CRITICAL(log_message):
LOGGER.critical(log_message)
LOGGER.critical(traceback.format_exc())
os.makedirs(LOGS_DIR, exist_ok=True)
load_dotenv(ENV_PATH)
### API essentials
ROUTERS = os.getenv('ROUTERS', '').split(',')
PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',')
GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY")
# HOST_NET and HOST_PORT comprise HOST, which is what the server will bind to
HOST_NET = os.getenv("HOST_NET", "127.0.0.1")
HOST_PORT = int(os.getenv("HOST_PORT", 4444))
HOST = f"{HOST_NET}:{HOST_PORT}"
BASE_URL = os.getenv("BASE_URL", f"http://{HOST}")
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
TRUSTED_SUBNETS = [ipaddress.ip_network(subnet.strip()) for subnet in os.getenv('TRUSTED_SUBNETS', '127.0.0.1/32').split(',')]
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
### Directories & general paths
HOME_DIR = Path.home()
ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts"
os.makedirs(ALERTS_DIR, exist_ok=True)
REQUESTS_DIR = LOGS_DIR / "requests"
os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
### Databases
DB = os.getenv("DB", 'sijdb')
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
DB_PORT = os.getenv("DB_PORT", 5432)
DB_USER = os.getenv("DB_USER", 'sij')
DB_PASS = os.getenv("DB_PASS")
DB_SSH = os.getenv("DB_SSH", "100.64.64.15")
DB_SSH_USER = os.getenv("DB_SSH_USER")
DB_SSH_PASS = os.getenv("DB_SSH_ENV")
DB_URL = f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB}'
### LOCATE AND WEATHER LOCALIZATIONS
USER_FULLNAME = os.getenv('USER_FULLNAME')
USER_BIO = os.getenv('USER_BIO')
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
HOME_ZIP = os.getenv("HOME_ZIP") # unimplemented
LOCATION_OVERRIDES = DATA_DIR / "loc_overrides.json"
LOCATIONS_CSV = DATA_DIR / "US.csv"
# DB = DATA_DIR / "weatherlocate.db" # deprecated
VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline")
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
### Obsidian & notes
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or HOME_DIR / "Nextcloud" / "notes")
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
OBSIDIAN_BANNER_DIR = f"{OBSIDIAN_RESOURCES_DIR}/banners"
os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_BANNER_DIR, exist_ok=True)
OBSIDIAN_BANNER_SCENE = os.getenv("OBSIDIAN_BANNER_SCENE", "wallpaper")
OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian")
DOC_DIR = DATA_DIR / "docs"
os.makedirs(DOC_DIR, exist_ok=True)
### DATETIME SCHEMA FOR DAILY NOTE FOLDER HIERARCHY FORMATTING ###
YEAR_FMT = os.getenv("YEAR_FMT")
MONTH_FMT = os.getenv("MONTH_FMT")
DAY_FMT = os.getenv("DAY_FMT")
DAY_SHORT_FMT = os.getenv("DAY_SHORT_FMT")
### Large language model
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "dolphin-mistral")
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "Luna")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
### Stable diffusion
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
os.makedirs(SD_WORKFLOWS_DIR, exist_ok=True)
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py')
SD_CONFIG_PATH = CONFIG_DIR / 'sd.json'
with open(SD_CONFIG_PATH, 'r') as SD_CONFIG_file:
SD_CONFIG = json.load(SD_CONFIG_file)
### Summarization
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 100)) # measured in tokens
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
SUMMARY_INSTRUCT = os.getenv("SUMMARY_INSTRUCT", "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "llama3")
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 4096))
### ASR
ASR_DIR = DATA_DIR / "asr"
os.makedirs(ASR_DIR, exist_ok=True)
WHISPER_CPP_DIR = HOME_DIR / str(os.getenv("WHISPER_CPP_DIR"))
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'NULL,VOID').split(',')
### TTS
PREFERRED_TTS = os.getenv("PREFERRED_TTS", "None")
TTS_DIR = DATA_DIR / "tts"
os.makedirs(TTS_DIR, exist_ok=True)
VOICE_DIR = TTS_DIR / 'voices'
os.makedirs(VOICE_DIR, exist_ok=True)
PODCAST_DIR = TTS_DIR / "sideloads"
os.makedirs(PODCAST_DIR, exist_ok=True)
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
### Calendar & email account
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
class IMAP_DETAILS(BaseModel):
email: str
password: str
host: str
imap_port: int
smtp_port: int
imap_encryption: str = None
smtp_encryption: str = None
IMAP = IMAP_DETAILS(
email = os.getenv('IMAP_EMAIL'),
password = os.getenv('IMAP_PASSWORD'),
host = os.getenv('IMAP_HOST', '127.0.0.1'),
imap_port = int(os.getenv('IMAP_PORT', 1143)),
smtp_port = int(os.getenv('SMTP_PORT', 469)),
imap_encryption = os.getenv('IMAP_ENCRYPTION', None),
smtp_encryption = os.getenv('SMTP_ENCRYPTION', None)
)
AUTORESPONSE_WHITELIST = os.getenv('AUTORESPONSE_WHITELIST', '').split(',')
AUTORESPONSE_BLACKLIST = os.getenv('AUTORESPONSE_BLACKLIST', '').split(',')
AUTORESPONSE_BLACKLIST.extend(["no-reply@", "noreply@", "@uscourts.gov", "@doi.gov"])
AUTORESPONSE_CONTEXT = os.getenv('AUTORESPONSE_CONTEXT', None)
AUTORESPOND = AUTORESPONSE_CONTEXT != None
### Courtlistener & other webhooks
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
### Keys & passwords
PUBLIC_KEY_FILE = os.getenv("PUBLIC_KEY_FILE", 'you_public_key.asc')
PUBLIC_KEY = (BASE_DIR.parent / PUBLIC_KEY_FILE).read_text()
MAC_ID = os.getenv("MAC_ID")
MAC_UN = os.getenv("MAC_UN")
MAC_PW = os.getenv("MAC_PW")
TIMING_API_KEY = os.getenv("TIMING_API_KEY")
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
### Tailscale
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
TS_ID = os.getenv("TS_ID", "NULL")
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
### Cloudflare
CF_API_BASE_URL = os.getenv("CF_API_BASE_URL")
CF_TOKEN = os.getenv("CF_TOKEN")
CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
### Caddy - not fully implemented
BASE_URL = os.getenv("BASE_URL")
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
### Microsoft Graph
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
MS365_SECRET = os.getenv('MS365_SECRET')
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
MS365_KEY = MS365_KEY_PATH.read_text()
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
### Maintenance
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours

146
sijapi/__main__.py Executable file
View file

@ -0,0 +1,146 @@
#!/Users/sij/miniforge3/envs/api/bin/python
from fastapi import FastAPI, Request, HTTPException, Response
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import ClientDisconnect
from hypercorn.asyncio import serve
from hypercorn.config import Config
import sys
import asyncio
import httpx
import argparse
import json
import ipaddress
import importlib
from dotenv import load_dotenv
from pathlib import Path
from datetime import datetime
import argparse
from . import LOGGER, LOGS_DIR
from .logs import Logger
parser = argparse.ArgumentParser(description='Personal API.')
parser.add_argument('--debug', action='store_true', help='Set log level to INFO')
parser.add_argument('--test', type=str, help='Load only the specified module.')
args = parser.parse_args()
# Using the package logger
main_logger = Logger("main", LOGS_DIR)
main_logger.setup_from_args(args)
logger = LOGGER
# Use the logger
logger.debug("Debug Log")
logger.info("Info Log")
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOST, ENV_PATH, GLOBAL_API_KEY, REQUESTS_DIR, ROUTER_DIR, REQUESTS_LOG_PATH, PUBLIC_SERVICES, TRUSTED_SUBNETS, ROUTERS
# Initialize a FastAPI application
api = FastAPI()
# CORSMiddleware
api.add_middleware(
CORSMiddleware,
allow_origins=['*'],
allow_credentials=True,
allow_methods=['*'],
allow_headers=['*'],
)
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
client_ip = ipaddress.ip_address(request.client.host)
if request.method == "OPTIONS":
# Allow CORS preflight requests
return JSONResponse(status_code=200)
if request.url.path not in PUBLIC_SERVICES:
if not any(client_ip in subnet for subnet in TRUSTED_SUBNETS):
api_key_header = request.headers.get("Authorization")
api_key_query = request.query_params.get("api_key")
if api_key_header:
api_key_header = api_key_header.lower().split("bearer ")[-1]
if api_key_header != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
ERR(f"Invalid API key provided by a requester.")
return JSONResponse(
status_code=401,
content={"detail": "Invalid or missing API key"}
)
response = await call_next(request)
# DEBUG(f"Request from {client_ip} is complete")
return response
api.add_middleware(SimpleAPIKeyMiddleware)
canceled_middleware = """
@api.middleware("http")
async def log_requests(request: Request, call_next):
DEBUG(f"Incoming request: {request.method} {request.url}")
DEBUG(f"Request headers: {request.headers}")
DEBUG(f"Request body: {await request.body()}")
response = await call_next(request)
return response
async def log_outgoing_request(request):
INFO(f"Outgoing request: {request.method} {request.url}")
DEBUG(f"Request headers: {request.headers}")
DEBUG(f"Request body: {request.content}")
"""
@api.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}")
ERR(f"Request: {request.method} {request.url}")
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
@api.middleware("http")
async def handle_exception_middleware(request: Request, call_next):
try:
response = await call_next(request)
except RuntimeError as exc:
if str(exc) == "Response content longer than Content-Length":
# Update the Content-Length header to match the actual response content length
response.headers["Content-Length"] = str(len(response.body))
else:
raise
return response
def load_router(router_name):
router_file = ROUTER_DIR / f'{router_name}.py'
DEBUG(f"Attempting to load {router_name.capitalize()}...")
if router_file.exists():
module_path = f'sijapi.routers.{router_name}'
try:
module = importlib.import_module(module_path)
router = getattr(module, router_name)
api.include_router(router)
INFO(f"{router_name.capitalize()} router loaded.")
except (ImportError, AttributeError) as e:
CRITICAL(f"Failed to load router {router_name}: {e}")
else:
ERR(f"Router file for {router_name} does not exist.")
def main(argv):
if args.test:
load_router(args.test)
else:
CRITICAL(f"sijapi launched")
CRITICAL(f"{args._get_args}")
for router_name in ROUTERS:
load_router(router_name)
config = Config()
config.keep_alive_timeout = 1200
config.bind = [HOST]
asyncio.run(serve(api, config))
if __name__ == "__main__":
main(sys.argv[1:])

496
sijapi/config/.env-example Normal file
View file

@ -0,0 +1,496 @@
#──────────────────────────────────────────────────────────────────────────────────
# C O N F I G U R A T I O N F I L E
#──────────────────────────────────────────────────────────────────────────────────
#
# Hi friend! You've found my hidden .config.YAML-example file. Do you like Zalgo
# text and old-school ASCII art? I bet you do. So listen, this'll be your method
# for configuring sijapi, and nothing works until you at least:
#
# (1) fill in the ESSENTIALS category, and
#
# (2) rename this file `.config.yaml`
#
# ... and even then, certain features will not work until you set other
# relevant variables below.
#
# So get yourself a beverage, put on some sick beats, and settle in for a vibe-y
# configuration sesh. Remember to read my detailed notes if you ever feel lost,
# and most important, remember:
#
# † you are NOT alone,
# † I love you SO much,
# † and you are S̸̢̟̑̒̊ͅō̸͔͕͎̟͜ worthy.
#
# y o u r b f & b f 4 e ,
# .x+=:. . .
# z` ^% @88> .. †††>
# . <k %8P 888> .d`` %†P
# .@8Ned8" . "8P u @8Ne. .u .
# .@^%8888" .@88u . us888u. %8888:u@88N .@88u
# x88: `)8b. ''888E` u888u. .@88 "8888" `888I 888. ''888E`
# ~ 8888N=*8888 888E `'888E 9888 9888 888I 888I 888E
# %8" R88 888E 888E 9888 9888 888I 888I 888E
# @8Wou 9% 888E 888E 9888 9888 uW888L 888' 888E
# .888888P` 888& 888E 9888 9888 '*88888Nu88P 888&
# ` ^"F R888" 888E "888*""888" ~ '88888F` R888"
# "" 888E ^Y" ^Y' 888 ^ ""
# 888E *8E
# 888P '8>
# .J88" " "
#
#
# B U T I H E A R Y O U :
# L E T ' S T A K E I T S L O W A N D
# ───────────── S̢͉̺ T̪͔͓ A͇̞ R̘͕͙ T̢̡͉ W͚̻ I͉͇͜ T̟͖̺ H̡͚͙ T̺̞̠ H̢̢̙ E̢̪͓ ──────────────
#
# ███████╗███████╗███████╗███████╗███╗ ██╗████████╗██╗ █████╗ ██╗ ███████╗
# ██╔════╝██╔════╝██╔════╝██╔════╝████╗ ██║╚══██╔══╝██║██╔══██╗██║ ██╔════╝
# █████╗ ███████╗███████╗█████╗ ██╔██╗ ██║ ██║ ██║███████║██║ ███████╗
# ██╔══╝ ╚════██║╚════██║██╔══╝ ██║╚██╗██║ ██║ ██║██╔══██║██║ ╚════██║
# ███████╗███████║███████║███████╗██║ ╚████║ ██║ ██║██║ ██║███████╗███████║
# ╚══════╝╚══════╝╚══════╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝
# ─────────────────────────────────────────────────────────────────
#
#─── first, bind an ip address and port : ──────────────────────────────────────────
HOST_NET=0.0.0.0
HOST_PORT=4444
BASE_URL=http://localhost:4444 # <--- replace with base URL of reverse proxy, etc
#─── notes: ──────────────────────────────────────────────────────────────────────
#
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
# BASE_URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the sd router.
# BASE_URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
#
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
#
# Here are a few options to consider to more securely enable access from
# other devices:
#
# (1) if all access can occur over Tailscale, either:
# (a) leave HOST_NET set to 127.0.0.1, run `tailscale cert $(tailscale
# whois $(tailscale ip | head -n 1) | awk '/Name:/ {print $2}')
# if you haven't already issued yourself a TLS certificate on
# Tailscale, and then run `tailscale serve --bg --https=4443
# 4444` to expose sijapi to your other tailscale-enabled devices
# at `https://{device.magicdns-domain.net:4443`}; or
# (b) set HOST_NET to your server's Tailscale IP (this should work
# but for me doesn't reliably)
#
# (2) if WAN access truly is required, leave HOST_NET set to 127.0.0.1 and
# configure either:
# (a) a Cloudflare tunnel, or
# (b) a reverse proxy with HTTPS (Caddy is excellent for this).
#
# And please be sure to set a strong API key either way but especially for (2).
# ──────────
#
#──── configure API key authorization and select exemptions──────────────────begin
GLOBAL_API_KEY=¿SECRET? # <--- specify a key to unlock the API
PUBLIC_SERVICES=/id,/ip,/health,/img/,/cl/dockets,/cl/search,/cd/alert
TRUSTED_SUBNETS=127.0.0.1/32,10.13.37.0/24,100.64.64.0/24
#─── notes: ───────────────────────────────────────────────────────────────────end
#
# GLOBAL_API_KEY determines the API key that will be required to access all endpoints, except access to PUBLIC_SERVICES or from TRUSTED_SUBNETS. Authentication is made via an `Authorization: Bearer {GLOBAL_API_KEY}` header.
# TRUSTED_SUBNETS might commonly include 127.0.0.1/32 (localhost), 100.x.x.0/24 (Tailscale tailnet), and/or 192.168.x.0/24 or 10.x.x.0/24 (local network).
# When configuring a reverse proxy or Cloudflare tunnel, please verify traffic through it does not appear to sijapi (i.e. in ./logs) as though it were coming from any of the subnets specified here. For sij, using Caddy, it does not, but your setup may differ.
# ──────────
#
#─── router selection: ────────────────────────────────────────────────────────────
ROUTERS=asr,calendar,cf,email,health,hooks,llm,locate,note,rag,sd,serve,summarize,time,tts,weather
UNLOADED=ig
#─── notes: ──────────────────────────────────────────────────────────────────────
#
# ROUTERS determines which routers are loaded.†
#
# UNLOADED is not used directly -- it's just there to help keep track which routers are disabled.
#
# † ┓ ┏ orth bearing in mind: some routers inherently rely on other routers,
# ┃┃┃ 3rd party APIs, or other apps being installed locally. If a router is
# ┗┻┛ set to load (i.e. is included in ROUTERS) and it depends on another router,
# that other router will also load too irrespective of whether it's listed.
#
# B U T L E T ' S G E T D O W N T O
# , S H A L L W E ?
#
# asr: requires faster_whisper — $ pip install faster_whisper — and
# downloading the model file specified in ASR_DEFAULT_MODEL.
#
# calendar: requires (1) a Microsoft 365 account with a properly configured
# Azure Active Directory app, and/or (2) Calendars on macOS.
#
# cf: interfaces with the Cloudflare API and Caddy to register new
# [sub-]domains on Cloudflare and deploy them with Caddy as
# reverse proxy.
#
# llm: requires ollama — $ pip install ollama — and downloading the
# models set in LLM_DEFAULT_MODEL and LLM_VISION_MODEL.
#
# email: email auto-responders and summarizers to be found here. Designed
# for use with IMAP.
#
# hooks: designed for two specific use cases: monitoring court dockets
# through CourtListener.org, and monitoring arbitrary web pages for
# changes in tandem with a self-hosted changedetection.io instance.
# Both require accounts; other functionality would require
# additional / modified code.
#
# ig: requires an Instagram account, with credentials and other settings
# configured separately in the ig_config.json file; relies heavily
# on the llm and sd routers which have their own dependencies.
#
# locate: some endpoints work as is, but the core location tracking
# functionality requires Postgresql + PostGIS extension and are
# designed specifically to pair with a mobile device where
# Pythonista is installed and configured to run the
# `gps_tracker.py` and `gps_upload.py` scripts periodically or per
# repeating conditionwy (e.g. via automation under Apple Shortcuts).
#
# note: designed for use with Obsidian plus the Daily Notes and Tasks
# core extensions; and the Admonitions, Banners, Icons (with the
# Lucide pack), and Make.md community extensions. Moreover `notes`
# relies heavily on the calendar, llm, locate, sd, summarize, time,
# tts, and weather routers and accordingly on the external
# dependencies of each.
#
# sd: requires ComfyUI plus any modules and StableDiffusion models
# set in sd_config and individual workflow .json files.
#
# summarize: relies on the llm router and thus requires ollama.
#
# time: requires the subscription-based macOS app 'Timing' (one of many
# apps that together make SetApp an incredible value for macOS users!)
#
# tts: designed for use with coqui — $ pip install coqui — and/or the
# ElevenLabs API.
#
# weather: requires a VisualCrossing API key and is designed for (but doesn't
# itself strictly require) Postgresql with the PostGIS extension;
# (... but it presently relies on the locate router, which does).
#
#
# ... Whew! that was a lot, right? I'm so glad we're in this together...
# ──────────
#
#───────── W H A T A R E Y O U R D I G I T S , H O N E Y B U N ? ────────
# LOCALIZATION
#─── what are your digits, honey-bun?: ──────────────────────────────────────────────
TZ=America/Los_Angeles
HOME_ZIP=97401
#─── notes: ─────────────────────────────────────────────────────────────────────────
#
# ──────────
#
#─────────────────────── Y ₒ ᵤ ' ᵣ ₑ G ₒ ₙ ₙ ₐ ₗ ₒ ᵥ ₑ ────────────────────────
#
# ░ ░░ ░░ ░ ░░░░░░░░ ░░░ ░░░ ░░ ░░░░░░░ ░
# ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒
# ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓▓▓▓▓ ▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓ ▓▓▓▓▓▓▓ ▓▓▓
# ████ ████ ████ ████ █████████████ █ ████ █ █ ███████ ███████
# ████ ████ ████ █ █ ██ ███ ██ ████ █ █ █
#
# A N D I ' M N O T. E V E N. J E A L O U S.
# Y O U D E S E R V E I T A L L , B A B Y C A K E S.
#
#─── use tailscale for secure remote access: ───────────────────────────────────────
TS_IP=100.13.37.5 # <--- enter your own TS IP address
TS_SUBNET=100.13.37.0/24 # <--- enter your own TS subnet (IPv4/CIDR)
TS_ID=¿SECRET? # <--- enter your own TS device name
TS_TAILNET=screaming_sailfin # <--- enter your own TS tailnet / MagicDNS name
TAILSCALE_API_KEY=¿SECRET? # <--- enter your own TS API key
#─── notes: ────────────────────────────────────────────────────────────────────────
#
# TS_IP should match the Tailscale IP of the device. But this is deprecated, and if the functionality becomes relevant again, it should be come back in the form of a dynamic check (`tailscale status` in a shell subprocess) in __init__.py or even the /id endpoint.
# TS_SUBNET should match the IP/CIDR-format tailnet
# TS_ID currently has two roles: it's used to assemble the complete MagicDNS of the server, and it determines what the /id endpoint on the health router returns. This is relevant where multiple servers run the script behind a load balancer (e.g. Caddy), as a means to check which server responds. Bear in mind that /id is NOT API key-protected by default here.
# TS_TAILNET should match the tailnet's MagicDNS domain (omitting the `.net`, for reasons)
# ──────────
#
#──────────── ᵁ & ᴹ ᴱ , W E C A N G E T T H R O U G H ────────────────────
#
# ██▓███ ▒█████ ██████ ▄▄▄█████▓ ▄████ ██▀███ ▓█████ ██████
# ▓██░ ██▒██▒ ██▒▒██ ▒ ▓ ██▒ ▓▒ ██▒ ▀█▒▓██ ▒ ██▒▓█ ▀ ▒██ ▒
# ▓██░ ██▓▒██░ ██▒░ ▓██▄ ▒ ▓██░ ▒░▒██░▄▄▄░▓██ ░▄█ ▒▒███ ░ ▓██▄
# ▒██▄█▓▒ ▒██ ██░ ▒ ██▒░ ▓██▓ ░ ░▓█ ██▓▒██▀▀█▄ ▒▓█ ▄ ▒ ██▒
# ▒██▒ ░ ░ ████▓▒░▒██████▒▒ ▒██▒ ░ ░▒▓███▀▒░██▓ ▒██▒░▒████▒▒██████▒▒
# ▒██▒ ░ ░ ▒░▒░▒░ ▒ ▒▓▒ ▒ ░ ▒ ░░ ░▒ ▒ ░ ▒▓ ░▒▓░░░ ▒░ ░▒ ▒▓▒ ▒ ░
# ▒▓▒░ ░ ▒ ▒░ ░ ░▒ ░ ░ ░ ░ ░ ░▒ ░ ▒░ ░ ░ ░░ ░▒ ░ ░
# ░▒ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░░ ░ ░ ░ ░ ░
# ░░ ░ ░T̷ O̷ G̷ E̷ T̷ H̷ ░ R̷. ░ ░ ░ ░ ░
#
#─── frag, or weat,and locate modules:── .
DB=db
#
DB_HOST=127.0.0.1
DB_PORT=5432
# R E A L T I G H T.
DB_USER=postgres
DB_PASS=¿SECRET? # <--- enter your own Postgres password'
# Y E A H . . .
DB_SSH=100.64.64.15
# . . . 𝙹 𝚄 𝚂 𝚃 𝙻 𝙸 𝙺 𝙴 𝚃 𝙷 𝙰 𝚃.
DB_SSH_USER=sij
DB_SSH_PASS=¿SECRET? # <--- enter SSH password for pg server (if not localhost)
#─── notes: ────────────────────────────────────────────────── S E E ? 𝕰 𝖅 - 𝕻 𝖅
#
# DB, DB_HOST, DB_PORT, DB_USER, and DB_PASS should specify those respective
# credentials for your Postgres database. DB_SSH and associated _USER and _PASS
# variables allow database access over an SSH tunnel.
#
# In the current implementation, we rely on Postgres to hold:
# i. user-logged location data (locate module), and
# ii. results from past weather forecast checks (weather module).
#
# A future version will hopefully make use of PostGIS's geocoding capabilities,
# and add a vector database for the LLM module. Until then it's up to you if the
# locate and weather modules are worth the hassle of maintaining Postgres.
# ──────────
#
#─────────────────────────────── 𝐼 𝐵 𝐸 𝑇 𝑌 𝑂 𝑈 ─────────────────────────────────
# 𝑅 𝐸 𝐶 𝐸 𝐼 𝑉 𝐸 𝐴 𝐿 𝑂 𝑇 𝑂 𝐹 𝐿 𝑂 𝑉 𝐸 𝐿 𝐸 𝑇 𝑇 𝐸 𝑅 𝑆 𝑂 𝑉 𝐸 𝑅
#
# .----------------. .----------------. .----------------. .----------------.
# | .--------------. | .--------------. | .--------------. | .--------------. |
# | | _____ | | | ____ ____ | | | __ | | | ______ | |
# | | |_ _| | | ||_ \ / _|| | | / \ | | | |_ __ \ | |
# | | | | | | | | \/ | | | | / /\ \ | | | | |__) | | |
# | | | | | | | | |\ /| | | | | / ____ \ | | | | ___/ | |
# | | _| |_ | | | _| |_\/_| |_ | | | _/ / \ \_ | | | _| |_ | |
# | | |_____| | | ||_____||_____|| | ||____| |____|| | | |_____| | |
# | | | | | | | | | | | | |
# | '--------------' | '--------------' | '--------------' | '--------------' |
# '----------------' '----------------' '----------------' '----------------'
#
# 𝙴 𝙼 𝙰 𝙸 𝙻
#
#─── imap & smtp: ────────────────────────────────────────────────────────────────────────
IMAP_HOST=127.0.0.1
IMAP_EMAIL=¿SECRET? # <--- enter yours
IMAP_PASSWORD=¿SECRET? # <--- enter yours
IMAP_PORT=1142
IMAP_ENCRYPTION=STARTTLS
SMTP_PORT=1024
SMTP_ENCRYPTION=SSL
AUTORESPONSE_WHITELIST=¿SECRET? # <--- enter complete/fragmented emails, or keywords
AUTORESPONSE_BLACKLIST=¿SECRET? # <--- same deal-io
AUTORESPONSE_CONTEXT=¿SECRET? # <--- inform the LLM why it's auto-responding for you'
USER_FULLNAME=¿SECRET? # <--- more context for the LLM
USER_BIO=¿SECRET? # <--- yet more context for the nosy LLM
#─── notes: ───────────────────────────────────────────────────────────────────────────────
#
# This is primarily for summarizing incoming emails. Any IMAP account should work, but
# I focused testing on a somewhat complex setup involving Protonmail Bridge.
#
# ──────────
#
#
#─── ms365 (calendars): ──────────────────────────────────────────────────────────────
ICAL_TOGGLE=True
ICALENDARS='E68FE085-2ECA-4097-AF0A-8D38C404D8DA,AB5A0473-16DD-4916-BD6D-F12AC2455285'
MS365_TOGGLE=False
MS365_CLIENT_ID=¿SECRET? # <--- enter your client ID (found in Azure pane)
MS365_TENANT_ID=¿SECRET? # <--- enter your tenant ID (found in Azure pane)
MS365_SECRET=¿SECRET? # <--- enter your app secret (found in Azure pane)
MS365_SCOPE='basic,calendar_all,Calendars.Read,Calendars.ReadWrite,offline_access'
MS365_TOKEN_FILE=oauth_token.txt
MS365_LOGIN_URL='https://login.microsoftonline.com'
MS365_REDIRECT_PATH=¿SECRET? # <--- e.g. http://localhost:4444/o365/oauth_redirect
#─── notes: ───────────────────────────────────────────────────────────────────────────────
#
# # MS365_CLIENT_ID, _TENANT_ID, _SECRET, AND _SCOPES must be obtained from Microsoft
# via the Azure portal, by creating a new app registration and an accompanying secret.
# MS365_THUMBPRINT is vestige of an earlier failed attempt to get this working, and
# for now is deprecated. I recommend seeking out a well-reviewed tutorial for
# creating an app on Azure with a client_id and secret and necessary scopes for
# individual calendar access, because I had one heck of a time trying various approaches.
# Do better, Microsoft.
#
# ──────────
#
#
#──────────────────── L E T ' S G E T S I L L Y , ─────────────────────────────
# T H E N G O B͎̝̪̼͉͜ O͖͕͇͚͉̼ N̢̦͖̺͔͎ K̠͓̠͖͜ E̝̼̫̙͔̞ R̡͇͖̙͉͎ S̡͉̠͎͙̪
# W I T H O U R O W N
#
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓██████▒▓██████▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓████████▓▒ ░▒▓████████▓▒ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
#
#
# ( F O R R E A L T H O U G H , T H E S E A R E
#
#─── via comfyui (stable diffusion): ─────── S̝͖̦͓̪̻ O̡͖̘̫͇̟ H̢͔͔̫͉͜ O̢̢͉̞͍̘ T̟͍͍̪̦̞ R I G H T N O W
LLM_URL=http://localhost:11434
SYSTEM_MSG=You are a helpful AI assistant.
DEFAULT_LLM=dolphin-mistral
DEFAULT_VISION=llava-llama3
OPENAI_API_KEY=¿SECRET? # <--- not presently implemented for anything
SUMMARY_MODEL=dolphin-mistral
SUMMARY_CHUNK_SIZE=4000
SUMMARY_CHUNK_OVERLAP=100
SUMMARY_TPW=1.3
SUMMARY_LENGTH_RATIO=4
SUMMARY_MIN_LENGTH=150
SUMMARY_TOKEN_LIMIT=4096
SUMMARY_INSTRUCT='You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
SUMMARY_INSTRUCT_TTS='You are an AI assistant that summarizes emails -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
DEFAULT_VOICE=joanne
WHISPER_CPP_DIR='whisper.cpp'
WHISPER_CPP_MODELS=tiny,base,base-en,small,medium,medium-en,large-v3
WEBCLIPPER_TTS=elevenlabs
EMAIL_SUMMARY_TTS=local
YEAR_FMT="%Y"
MONTH_FMT="%Y-%m %B"
DAY_FMT="%Y-%m-%d %A"
DAY_SHORT_FMT="%Y-%m-%d"
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
# The exact values here will depend on what software you are using to inference an LLM,
# and of course what models and capabilities are available through it. The script was
# designed for use with `ollama`, but most of the functionality should be equal with
# LM Studio, LocalAI, ect...
#
# DEFAULT_LLM is self-explanatory; DEFAULT_VISION is used for image recognition within
# a multimodal chat context, such as on the ig module for generating intelligible
# comments to Instagram posts, or more realistic captions for sd-generated images.
#
# Note it's possible to specify a separate model for general purposes and for
# summarization tasks. The other SUMMARY_ variables call for some explanation,
# in particular six that are most relevant when summarizing very long documents:
#
# SUMMARY_CHUNK_SIZE: determines the maximum length, in tokens, the pieces that are
# split and sent individually to the model.
#
# SUMMARY_CHUNK_OVERLAP: determines how much of each chunk is overlapped with the prior
# and next chunks. Set too high causes repetition, set too low
# causes misunderstood confusion and poor summary results.
# The summarization algorithm is flawed but I've gotten the best
# results with this set around 100200.
#
# SUMMARY_TPW: used in estimating the token count of a prompt for purposes of
# complying with the maximum tokens a model can handle at once.
# Best you can do is estimate. I tend to use long words a fair
# excessively and found my average was 1.3 tokens per word. YMMV.
#
# SUMMARY_LENGTH_RATIO: this is the primary control over the length of generated
# summaries, expressed as the ratio of original text length to
# summary length. The default, 4, means the summaries will be
# around 1/4 the length of the original text you provide it.
#
# SUMMARY_MIN_LENGTH: the default SUMMARY_LENGTH_RATIO of 4 isn't ideal for very
# short texts, but setting it any lower sacrifices conciseness
# in summaries of longer texts. In short one size doesn't fit
# all. The compromise I landed on was to set a "maximum minimum"
# summary length: under no circumstances will the script impose
# a smaller maximum length than this value.
#
# SUMMARY_INSTRUCT: sets the prompt used when summarizing text.
#
# SUMMARY_INSTRUCT_TTS: sets a separate prompt for use when summarizing text where
# tts output was requested; tends to yield "cleaner" audio
# with less numbers (page numbers, citations) and other
# information extraneous to spoken contexts.
#
# DEFAULT_VOICE: used for all tts tasks when a specific voice is not requested.
#
# ──────────
#
#
#────,-_/────────── W E C A N E X P E R I M E N T W I T H ──────────.───────────
# ' | ,~-,-. ,-. ,-. ,--. | --' ,--. ,-. ,--. ,-. ,-. |-- . ,-. ,-.
# .^ | | | | ,--| | | | --' | -,- | --' | | | --' | ,--| | | | | | |
# `--' ' ' ' `-^ `-| `--' `---| `--' ' ' `--' ' `--^ `' ` `-' ' '
# , | ,-. | ~ 𝙸 𝙽 𝚃 𝙷 𝙴 𝙽 𝚄 𝙳 𝙴 . ~
# `~~' `-+'
# O R F U L L Y C L O T H E D ── U P T O Y O U
#
#─── via comfyui (stable diffusion): ───── ( B U T L E T M E K N O W , Y E A H ? )
COMFYUI_URL=http://localhost:8188
COMFYUI_DIR=/Users/sij/workshop/ComfyUI
COMFYUI_LAUNCH_CMD="mamba activate comfyui && python main.py"
OBSIDIAN_BANNER_SCENE=wallpaper
PHOTOPRISM_USER=NOT_IMPLEMENTED
PHOTOPRISM_PASS=NOT_IMPLEMENTED
ANONYMIZED_TELEMETRY=False
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
# COMFY_URL, as you may expect, should point to the URL you use to access ComfyUI. If you
# don't know, watch for it in the server logs once ComfyUI is fully launched.
#
# COMFYUI_DIR, with similar self-evidence, should point to the base directory of your
# ComfyUI installation (i.e. the folder that contains `models`, `inputs`, and `outputs`).
# It can handle either a
#
# PhotoPrism integration is not yet implemented, so don't bother with that just yet.
# ──────────
#
# D O N ' T M I S S O N E ───────────────────────────────────────
#\ F I N A L S M A T T E R I N G O F Ⓜ Ⓘ Ⓢ Ⓒ Ⓔ Ⓛ Ⓛ Ⓐ Ⓝ Ⓨ \
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ _/\\\\_ _ _ _ _ _ /\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\\\\_ _ _ _ /\\\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\//\\\_ _ /\\\//\\\ _ _/\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\\///\\\/\\\/ \/\\\ _ _///_ _ _/\\\\\\\\\\_ _ _ _/\\\\\\\\_ _\
# \ _ _ \/\\\ _\///\\\/ _ \/\\\ _ _/\\\ _ \/\\\////// _ _ _/\\\////// _ _\
# \ _ _ \/\\\ _ _\/// _ _ \/\\\ _ _/\\\ _ \/\\\\\\\\\\_ _ /\\\_ _ _ _ _ _\
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ \////////\\\_ _\//\\\ _ _ _ _ _\
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ _/\\\\\\\\\\_ _ \///\\\\\\\\_ _\
# \ _ _ \///_ _ _ _ _ _ _ \///_ _ _///_ _ \////////// _ _ _ \//////// _ _\
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# ─────────────────── A N D O T H E R W H A T - H A V E - Y O U S ──
#
#─── other needful API keys, mainly: ────────────────────────────────────────────────────
CF_API_BASE_URL=¿SECRET? # <--- Cloudflare API URL
CF_TOKEN=¿SECRET? # <--- Cloudflare Token
VISUALCROSSING_BASE_URL='https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline'
VISUALCROSSING_API_KEY=¿SECRET? # <--- VisualCrossing API key (for Weather)
ELEVENLABS_API_KEY=¿SECRET? # <--- ElevenLabs API key (for TTS)
COURTLISTENER_BASE_URL='https://www.courtlistener.com'
COURTLISTENER_API_KEY=¿SECRET? # <--- CourtListener API key (for court docket entries)
TIMING_API_URL='https://web.timingapp.com/api/v1'
TIMING_API_KEY=¿SECRET? # <--- API key for macOS/web app Timing (time tracking)
PUBLIC_KEY_FILE=sij.asc # <--- public PGP key (served at /pgp)
MAC_ID=¿SECRET? # <--- Tailscale hostname for primary macOS (alerts)
MAC_UN=¿SECRET? # <--- Primary macOS username
MAC_PW=¿SECRET? # <--- Primary macOS password
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
#
# CF_TOKEN: a Cloudflare token. This is used on the cf router for quick
# deployment of new domains in tandem with Caddy and for ddns.
#
# VISUALCROSSING_API_KEY: used for obtaining weather forecasts. It is a very data-rich
# yet affordable source of weather info, with a generous free
# plan.
#
# ELEVENLABS_API_KEY: used when on the tts router if tts tasks are outsourced to
# the state-of-the-art models at ElevenLabs.
#
# COURTLISTENER_API_KEY: used primarily on the hooks router, but likely relevant only
# to legal professionals that will be aware what it is for.
#
# TIMING_API_URL: are used on the time router for generating various tasks
# & related to timekeeping, as well as on the notes router for
# TIMING_API_KEY: generating markdown-formatted timeslips. It requires an
# active subscription to the Timing app (macOS or web), but
# it's worth noting comes included in the SetApp subscribtion
# bundle, for the same price, last I checked, as subscribing to
# Timing alone. If you have a Mac and somehow don't know this
# already, SetApp is an utterly insane value. I pay $15/mo for
# apps that I would otherwise pay ~$100/mo for if subscribing
# individually. I want to say I wasn't paid to say this, but
# with those savings I almost feel like I was.
#
# MAC_ID: These last three variables are for a specific use case where
# MAC_UN: you want certain commands run, or alerts appearing, on a
# MAD_PW: designated macaOS computer. The alerts router is designed to
# deliver OS-level notifications to the specified Mac when a
# webhook gets a hit on specified keywords within the payload.
# Setting the MAC_ID to the TS_ID of the target Mac, allows
# the script to readily know whether it itself is the target
# (this is relevant in a load-balancing context), and how to
# reach the target if not — to wit, ssh using MagicDNS.

98
sijapi/config/config.py Normal file
View file

@ -0,0 +1,98 @@
import os
import yaml
from time import sleep
from pathlib import Path
import ipaddress
import yaml
class Config:
def __init__(self, yaml_file):
with open(yaml_file, 'r') as file:
self.data = yaml.safe_load(file)
def __getattr__(self, name):
if name in self.data:
value = self.data[name]
if isinstance(value, dict):
return ConfigSection(value)
return value
raise AttributeError(f"Config has no attribute '{name}'")
class ConfigSection:
def __init__(self, data):
self.data = data
def __getattr__(self, name):
if name in self.data:
value = self.data[name]
if isinstance(value, dict):
return ConfigSection(value)
return value
raise AttributeError(f"ConfigSection has no attribute '{name}'")
def __setattr__(self, name, value):
if name == 'data':
super().__setattr__(name, value)
else:
self.data[name] = value
# Load the YAML configuration file
CFG = Config('.config.yaml')
# Access existing attributes
print(CFG.API.PORT) # Output: localhost
def load_config():
yaml_file = os.path.join(os.path.dirname(__file__), ".config.yaml")
HOME_DIR = Path.home()
BASE_DIR = Path(__file__).resolve().parent.parent
CONFIG_DIR = BASE_DIR / "config"
ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts"
os.makedirs(ALERTS_DIR, exist_ok=True)
LOGS_DIR = BASE_DIR / "logs"
os.makedirs(LOGS_DIR, exist_ok=True)
REQUESTS_DIR = LOGS_DIR / "requests"
os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
DOC_DIR = DATA_DIR / "docs"
os.makedirs(DOC_DIR, exist_ok=True)
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
try:
with open(yaml_file, 'r') as file:
config_data = yaml.safe_load(file)
vars = {
"API": {
}
}
config = Config(config_data)
return config
except Exception as e:
print(f"Error while loading configuration: {e}")
return None
def reload_config():
while True:
global config
with open('config.yaml', 'r') as file:
config_data = yaml.safe_load(file)
config = Config(config_data)
sleep(300) # reload every 5 minutes

View file

@ -0,0 +1,151 @@
{
"Alpaca": {
"models": [
"mythomax",
"openhermes",
"deepseek"
],
"prefix": "\n### Instruction:\n",
"stops": [
"### Instruction"
],
"suffix": "\n### Response:\n",
"sysPrefix": "### System\n",
"sysSuffix": "\n"
},
"Amazon": {
"models": [
"mistrallite"
],
"prefix": "<|prompter|>",
"stops": [
"<|prompter|>",
"</s>"
],
"suffix": "</s><|assistant|>",
"sysPrefix": "",
"sysSuffix": ""
},
"ChatML": {
"models": [
"dolphin",
"capybara",
"nous-hermes-2"
],
"prefix": "<|im_end|>\n<|im_start|>user\n",
"stops": [
"<|im_end|>",
"<|im_start|>"
],
"suffix": "<|im_end|>\n<|im_start|>assistant\n",
"sysPrefix": "<|im_start|>system\n",
"sysSuffix": "<|im_end|>"
},
"Llama2": {
"models": [
"llama2-placeholder"
],
"prefix": "\n\n[INST] ",
"stops": [
"[/INST]",
"[INST]"
],
"suffix": "[/INST]\n\n",
"sysPrefix": "",
"sysSuffix": "\n\n"
},
"Mistral": {
"models": [
"mistral-instruct",
"mixtral-8x7b-instruct"
],
"prefix": "\n[INST] ",
"stops": [
"[/INST]",
"[INST]",
"</s>"
],
"suffix": "[/INST]\n",
"sysPrefix": "",
"sysSuffix": "\n<s>"
},
"Orca": {
"models": [
"upstage",
"neural",
"solar",
"SOLAR"
],
"prefix": "\n### User:\n",
"stops": [
"###",
"User:"
],
"suffix": "\n### Assistant:\n",
"sysPrefix": "### System:\n",
"sysSuffix": "\n"
},
"Phi2": {
"models": [
"phi-2"
],
"prefix": "\nSangye: ",
"stops": [
"###",
"User Message"
],
"suffix": "\nAssistant: ",
"sysPrefix": "Systen: ",
"sysSuffix": "\n"
},
"Phind": {
"models": [
"phind"
],
"prefix": "\n### User Message\n",
"stops": [
"###",
"User Message"
],
"suffix": "\n### Assistant\n",
"sysPrefix": "### System Prompt\n",
"sysSuffix": "\n"
},
"Vicuna": {
"models": [
"xwin",
"synthia",
"tess"
],
"prefix": "\nUSER: ",
"stops": [
"</s>",
"USER:",
"SYSTEM:"
],
"suffix": "</s>\nASSISTANT: ",
"sysPrefix": "SYSTEM: ",
"sysSuffix": "\n"
},
"Zephyr": {
"models": [
"zephyr"
],
"prefix": " ",
"stops": [
"</s>"
],
"suffix": "</s>\n ",
"sysPrefix": " ",
"sysSuffix": "</s>\n"
},
"default": {
"prefix": "\n### Instruction:\n",
"stops": [
"### Instruction"
],
"suffix": "\n### Response:\n",
"sysPrefix": "### System\n",
"sysSuffix": "\n"
}
}

View file

@ -0,0 +1,21 @@
[loggers]
keys=root
[handlers]
keys=consoleHandler
[formatters]
keys=consoleFormatter
[logger_root]
level=DEBUG
handlers=consoleHandler
[handler_consoleHandler]
class=StreamHandler
level=DEBUG
formatter=consoleFormatter
args=(sys.stdout,)
[formatter_consoleFormatter]
format=%(asctime)s %(name)s %(levelname)s %(message)s

View file

@ -0,0 +1,43 @@
{
"scenes": [
{
"scene": "default",
"triggers": [""],
"API_PPrompt": "(Highly-detailed) image of ",
"API_SPrompt": "; ((masterpiece)); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "`oil, paint splash, oil effect, dots, paint, freckles, liquid effect, canvas frame, 3d, bad art, asian, illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, explicit, topless`",
"llm_sys_msg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"llm_pre_prompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this scene description to its essence, staying true to what it describes: ",
"workflows": [{"workflow": "turbo.json", "size": "1024x768"}]
},
{
"scene": "portrait",
"triggers": [
"portrait",
"profile",
"headshot"
],
"API_PPrompt": "Highly-detailed portrait photo of ",
"API_SPrompt": "; attractive, cute, (((masterpiece))); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, nude",
"llm_sys_msg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic portrait photos. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on the pictured individual's eyes, pose, and other distinctive features. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words. Remember that the final product will be a still image, and action verbs are not as helpful as simple descriptions of position, appearance, background, etc.",
"llm_pre_prompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this portrait photo to its essence: ",
"workflows": [
{
"workflow": "selfie.json",
"size": "768x1024"
}
]
},
{
"scene": "wallpaper",
"triggers": ["wallpaper"],
"API_PPrompt": "Stunning widescreen image of ",
"API_SPrompt": ", masterpiece, (subtle:0.7), (nuanced:0.6), best quality, ultra detailed, ultra high resolution, 8k, (documentary:0.3), cinematic, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, earthporn, (eliot porter:0.6), (frans lanting:0.4), (daniel kordan:0.6), landscapephotography, ultra detailed, earth tones, moody",
"API_NPrompt": "FastNegativeV2, (easynegative:0.5), canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, Photoshop, video game, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, (Thomas Kinkade:0.5), sentimental, kitsch, kitschy, twee, commercial, holiday card, modern, futuristic, urban, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
"llm_sys_msg": "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"llm_pre_prompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. You can select any place on Earth that a young model from the Pacific Northwest is likely to travel to. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. This model is especially fond of wild and rugged places, mountains. She favors dark muted earth tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instread strive for nuance and originality in composition and environment.",
"workflows": [{"workflow": "landscape.json", "size": "1160x768"}]
}
]
}

74
sijapi/config/sd.json Normal file
View file

@ -0,0 +1,74 @@
{
"scenes": [
{
"API_NPrompt": "`oil, paint splash, oil effect, dots, paint, freckles, liquid effect, canvas frame, 3d, bad art, asian, illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, explicit, topless`",
"API_PPrompt": "(Highly-detailed) image of ",
"API_SPrompt": "; ((masterpiece)); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"llm_pre_prompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this scene description to its essence, staying true to what it describes: ",
"llm_sys_msg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic images. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"scene": "default",
"triggers": [
""
],
"workflows": [
{
"size": "1024x768",
"workflow": "turbo.json"
}
]
},
{
"API_NPrompt": "FastNegativeV2, easynegative, canvas frame, 3d, bad art, illustrated, deformed, blurry, duplicate, Photoshop, video game, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, (Thomas Kinkade:0.5), sentimental, kitsch, kitschy, twee, commercial, holiday card, modern, futuristic, urban, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
"API_PPrompt": "Stunning widescreen image of ",
"API_SPrompt": ", masterpiece, subtle, nuanced, best quality, ultra detailed, ultra high resolution, 8k, documentary, cinematic, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, earthporn, eliot porter, frans lanting, daniel kordan, landscapephotography, ultra detailed, earth tones, moody",
"llm_pre_prompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. You can select any place on Earth that a young model from the Pacific Northwest is likely to travel to. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. This model is especially fond of wild and rugged places, mountains. She favors dark muted earth tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instread strive for nuance and originality in composition and environment.",
"llm_sys_msg": "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"scene": "landscape",
"triggers": [
"landscape"
],
"workflows": [
{
"size": "1160x768",
"workflow": "landscape.json"
}
]
},
{
"API_NPrompt": "FastNegativeV2, (easynegative:0.5), canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, Photoshop, video game, anime, cartoon, fake, tiling, out of frame, bad art, bad anatomy, 3d render, nsfw, worst quality, low quality, text, watermark, (Thomas Kinkade:0.5), sentimental, kitsch, kitschy, twee, commercial, holiday card, modern, futuristic, urban, comic, cartoon, FastNegativeV2, epiCNegative, easynegative, verybadimagenegative_v1.3",
"API_PPrompt": "Stunning widescreen image of ",
"API_SPrompt": ", masterpiece, (subtle:0.7), (nuanced:0.6), best quality, ultra detailed, ultra high resolution, 8k, (documentary:0.3), cinematic, filmic, moody, dynamic lighting, realistic, wallpaper, landscape photography, professional, earthporn, (eliot porter:0.6), (frans lanting:0.4), (daniel kordan:0.6), landscapephotography, ultra detailed, earth tones, moody",
"llm_pre_prompt": "Using a series of words or sentence fragments separated by commas, describe a professional landscape photograph of a striking scene of nature. You can select any place on Earth that a young model from the Pacific Northwest is likely to travel to. Focus on describing the content and composition of the image. Only use words and phrases that are visually descriptive. This model is especially fond of wild and rugged places, mountains. She favors dark muted earth tones, dramatic lighting, and interesting juxtapositions between foreground and background, or center of frame and outer frame areas. Avoid cliche situations; instread strive for nuance and originality in composition and environment.",
"llm_sys_msg": "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words.",
"scene": "wallpaper",
"triggers": [
"wallpaper"
],
"workflows": [
{
"size": "1080x512",
"workflow": "wallpaper.json"
}
]
},
{
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3, nsfw, nude",
"API_PPrompt": "Highly-detailed portrait photo of ",
"API_SPrompt": "; attractive, cute, (((masterpiece))); ((beautiful lighting)), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"llm_pre_prompt": "Using the most visually descriptive sentence fragments, phrases, and words, distill this portrait photo to its essence: ",
"llm_sys_msg": "You are a helpful AI who assists in refining prompts that will be used to generate highly realistic portrait photos. Upon receiving a prompt, you refine it by simplifying and distilling it to its essence, retaining the most visually evocative and distinct elements from what was provided, focusing in particular on the pictured individual's eyes, pose, and other distinctive features. You may infer some visual details that were not provided in the prompt, so long as they are consistent with the rest of the prompt. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words. Remember that the final product will be a still image, and action verbs are not as helpful as simple descriptions of position, appearance, background, etc.",
"scene": "portrait",
"triggers": [
"portrait",
"profile",
"headshot"
],
"workflows": [
{
"size": "768x1024",
"workflow": "selfie.json"
}
]
}
]
}

View file

@ -0,0 +1,8 @@
[
{
"name": "Echo Valley Ranch",
"latitude": 42.8098216,
"longitude": -123.049396,
"radius": 1.5
}
]

View file

@ -0,0 +1,220 @@
{
"4": {
"inputs": {
"ckpt_name": "Other/dreamshaperXL_v21TurboDPMSDE.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"6": {
"inputs": {
"text": "API_PPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"7": {
"inputs": {
"text": "API_NPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"8": {
"inputs": {
"samples": [
"16",
0
],
"vae": [
"4",
2
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"9": {
"inputs": {
"filename_prefix": "API_",
"images": [
"8",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "Save Image"
}
},
"10": {
"inputs": {
"text": "API_SPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"14": {
"inputs": {
"conditioning_1": [
"6",
0
],
"conditioning_2": [
"10",
0
]
},
"class_type": "ConditioningCombine",
"_meta": {
"title": "Conditioning (Combine)"
}
},
"15": {
"inputs": {
"batch_size": 1,
"width": 1023,
"height": 1025,
"resampling": "nearest-exact",
"X": 0,
"Y": 0,
"Z": 0,
"evolution": 0,
"frame": 0,
"scale": 5,
"octaves": 8,
"persistence": 1.5,
"lacunarity": 2,
"exponent": 4,
"brightness": 0,
"contrast": 0,
"clamp_min": 0,
"clamp_max": 1,
"seed": 648867523029843,
"device": "cpu",
"optional_vae": [
"4",
2
],
"ppf_settings": [
"17",
0
]
},
"class_type": "Perlin Power Fractal Latent (PPF Noise)",
"_meta": {
"title": "Perlin Power Fractal Noise 🦚"
}
},
"16": {
"inputs": {
"seed": 863091325074880,
"steps": 10,
"cfg": 8,
"sampler_name": "dpmpp_2m_sde",
"scheduler": "karras",
"start_at_step": 0,
"end_at_step": 10000,
"enable_denoise": "false",
"denoise": 1,
"add_noise": "enable",
"return_with_leftover_noise": "disable",
"noise_type": "brownian_fractal",
"noise_blending": "cuberp",
"noise_mode": "additive",
"scale": 1,
"alpha_exponent": 1,
"modulator": 1,
"sigma_tolerance": 0.5,
"boost_leading_sigma": "false",
"guide_use_noise": "true",
"model": [
"4",
0
],
"positive": [
"14",
0
],
"negative": [
"7",
0
],
"latent_image": [
"15",
0
],
"ppf_settings": [
"17",
0
],
"ch_settings": [
"18",
0
]
},
"class_type": "Power KSampler Advanced (PPF Noise)",
"_meta": {
"title": "Power KSampler Advanced 🦚"
}
},
"17": {
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"evolution": 0,
"frame": 0,
"scale": 5,
"octaves": 8,
"persistence": 1.5,
"lacunarity": 2,
"exponent": 4,
"brightness": 0,
"contrast": 0
},
"class_type": "Perlin Power Fractal Settings (PPF Noise)",
"_meta": {
"title": "Perlin Power Fractal Settings 🦚"
}
},
"18": {
"inputs": {
"frequency": 320,
"octaves": 12,
"persistence": 1.5,
"num_colors": 16,
"color_tolerance": 0.05,
"angle_degrees": 45,
"brightness": 0,
"contrast": 0,
"blur": 2.5
},
"class_type": "Cross-Hatch Power Fractal Settings (PPF Noise)",
"_meta": {
"title": "Cross-Hatch Power Fractal Settings 🦚"
}
}
}

View file

@ -0,0 +1,298 @@
{
"10": {
"_meta": {
"title": "Power KSampler Advanced 🦚"
},
"class_type": "Power KSampler Advanced (PPF Noise)",
"inputs": {
"add_noise": "enable",
"alpha_exponent": 1,
"boost_leading_sigma": "false",
"cfg": 4.5,
"ch_settings": [
"12",
0
],
"denoise": 1,
"enable_denoise": "false",
"end_at_step": 10000,
"guide_use_noise": "true",
"latent_image": [
"13",
0
],
"model": [
"4",
0
],
"modulator": 1,
"negative": [
"7",
0
],
"noise_blending": "hslerp",
"noise_mode": "additive",
"noise_type": "vanilla_comfy",
"positive": [
"6",
0
],
"ppf_settings": [
"11",
0
],
"return_with_leftover_noise": "disable",
"sampler_name": "dpmpp_2m_sde",
"scale": 1,
"scheduler": "karras",
"seed": 301923985151711,
"sigma_tolerance": 0.5,
"start_at_step": 0,
"steps": 20
}
},
"11": {
"_meta": {
"title": "Perlin Power Fractal Settings 🦚"
},
"class_type": "Perlin Power Fractal Settings (PPF Noise)",
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"brightness": 0,
"contrast": 0,
"evolution": 0,
"exponent": 4,
"frame": 0,
"lacunarity": 2,
"octaves": 8,
"persistence": 1.5,
"scale": 5
}
},
"12": {
"_meta": {
"title": "Cross-Hatch Power Fractal Settings 🦚"
},
"class_type": "Cross-Hatch Power Fractal Settings (PPF Noise)",
"inputs": {
"angle_degrees": 45,
"blur": 2.5,
"brightness": 0,
"color_tolerance": 0.05,
"contrast": 0,
"frequency": 320,
"num_colors": 16,
"octaves": 12,
"persistence": 1.5
}
},
"13": {
"_meta": {
"title": "Perlin Power Fractal Noise 🦚"
},
"class_type": "Perlin Power Fractal Latent (PPF Noise)",
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"batch_size": 1,
"brightness": 0,
"clamp_max": 1,
"clamp_min": 0,
"contrast": 0,
"device": "cpu",
"evolution": 0,
"exponent": 4,
"frame": 0,
"height": 1025,
"lacunarity": 2.5,
"octaves": 8,
"optional_vae": [
"4",
2
],
"persistence": 1.5,
"ppf_settings": [
"11",
0
],
"resampling": "nearest-exact",
"scale": 5,
"seed": 961984691493347,
"width": 1023
}
},
"23": {
"_meta": {
"title": "Ultimate SD Upscale"
},
"class_type": "UltimateSDUpscale",
"inputs": {
"cfg": 7.5,
"denoise": 0.32,
"force_uniform_tiles": true,
"image": [
"8",
0
],
"mask_blur": 8,
"mode_type": "Chess",
"model": [
"24",
0
],
"negative": [
"32",
0
],
"positive": [
"31",
0
],
"sampler_name": "dpmpp_2m_sde",
"scheduler": "karras",
"seam_fix_denoise": 1,
"seam_fix_mask_blur": 8,
"seam_fix_mode": "Band Pass",
"seam_fix_padding": 16,
"seam_fix_width": 64,
"seed": 221465882658451,
"steps": 16,
"tile_height": 768,
"tile_padding": 32,
"tile_width": 768,
"tiled_decode": false,
"upscale_by": 4,
"upscale_model": [
"33",
0
],
"vae": [
"24",
2
]
}
},
"24": {
"_meta": {
"title": "Load Checkpoint"
},
"class_type": "CheckpointLoaderSimple",
"inputs": {
"ckpt_name": "SD1.5/realisticVisionV60B1_v51VAE.safetensors"
}
},
"31": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"24",
1
],
"text": ""
}
},
"32": {
"_meta": {
"title": "ConditioningZeroOut"
},
"class_type": "ConditioningZeroOut",
"inputs": {
"conditioning": [
"31",
0
]
}
},
"33": {
"_meta": {
"title": "Load Upscale Model"
},
"class_type": "UpscaleModelLoader",
"inputs": {
"model_name": "4x-UltraSharp.pth"
}
},
"34": {
"_meta": {
"title": "Save Image"
},
"class_type": "SaveImage",
"inputs": {
"filename_prefix": "API_",
"images": [
"23",
0
]
}
},
"36": {
"_meta": {
"title": "Save Image"
},
"class_type": "SaveImage",
"inputs": {
"filename_prefix": "Pre_",
"images": [
"8",
0
]
}
},
"4": {
"_meta": {
"title": "Load Checkpoint"
},
"class_type": "CheckpointLoaderSimple",
"inputs": {
"ckpt_name": "Other/playgroundv2.safetensors"
}
},
"6": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],
"text": "API_PPrompt"
}
},
"7": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],
"text": "API_NPrompt"
}
},
"8": {
"_meta": {
"title": "VAE Decode"
},
"class_type": "VAEDecode",
"inputs": {
"samples": [
"10",
0
],
"vae": [
"4",
2
]
}
}
}

View file

@ -0,0 +1,456 @@
{
"11": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"12",
1
],
"text": [
"25",
0
]
}
},
"12": {
"_meta": {
"title": "Load LoRA"
},
"class_type": "LoraLoader",
"inputs": {
"clip": [
"4",
1
],
"lora_name": "SDXL/add-detail-xl.safetensors",
"model": [
"4",
0
],
"strength_clip": 0.3,
"strength_model": 0.33
}
},
"13": {
"_meta": {
"title": "Load LoRA"
},
"class_type": "LoraLoader",
"inputs": {
"clip": [
"12",
1
],
"lora_name": "SDXL/SDXLLandskaper_v1-000003.safetensors",
"model": [
"12",
0
],
"strength_clip": 0.75,
"strength_model": 0.8
}
},
"14": {
"_meta": {
"title": "Power KSampler Advanced 🦚"
},
"class_type": "Power KSampler Advanced (PPF Noise)",
"inputs": {
"add_noise": "enable",
"alpha_exponent": 1,
"boost_leading_sigma": "false",
"cfg": 8,
"ch_settings": [
"19",
0
],
"denoise": 1,
"enable_denoise": "false",
"end_at_step": 10000,
"guide_use_noise": "true",
"latent_image": [
"20",
0
],
"model": [
"13",
0
],
"modulator": 1,
"negative": [
"61",
0
],
"noise_blending": "cuberp",
"noise_mode": "additive",
"noise_type": "brownian_fractal",
"positive": [
"63",
0
],
"ppf_settings": [
"18",
0
],
"return_with_leftover_noise": "disable",
"sampler_name": "dpmpp_2m_sde",
"scale": 1,
"scheduler": "karras",
"seed": 809193506471910,
"sigma_tolerance": 0.5,
"start_at_step": 0,
"steps": 28
}
},
"18": {
"_meta": {
"title": "Perlin Power Fractal Settings 🦚"
},
"class_type": "Perlin Power Fractal Settings (PPF Noise)",
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"brightness": 0,
"contrast": 0,
"evolution": 0.2,
"exponent": 5,
"frame": 40,
"lacunarity": 2.4,
"octaves": 8,
"persistence": 1.6,
"scale": 8
}
},
"19": {
"_meta": {
"title": "Cross-Hatch Power Fractal Settings 🦚"
},
"class_type": "Cross-Hatch Power Fractal Settings (PPF Noise)",
"inputs": {
"angle_degrees": 45,
"blur": 2.5,
"brightness": 0,
"color_tolerance": 0.05,
"contrast": 0,
"frequency": 320,
"num_colors": 32,
"octaves": 24,
"persistence": 1.5
}
},
"20": {
"_meta": {
"title": "Perlin Power Fractal Noise 🦚"
},
"class_type": "Perlin Power Fractal Latent (PPF Noise)",
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"batch_size": 1,
"brightness": 0,
"clamp_max": 1,
"clamp_min": 0,
"contrast": 0,
"device": "cpu",
"evolution": 0.2,
"exponent": 4,
"frame": 40,
"height": [
"54",
1
],
"lacunarity": 2.4,
"octaves": 8,
"optional_vae": [
"4",
2
],
"persistence": 1.6,
"ppf_settings": [
"18",
0
],
"resampling": "nearest-exact",
"scale": 8,
"seed": 189685705390202,
"width": [
"54",
0
]
}
},
"21": {
"_meta": {
"title": "Conditioning (Combine)"
},
"class_type": "ConditioningCombine",
"inputs": {
"conditioning_1": [
"11",
0
],
"conditioning_2": [
"6",
0
]
}
},
"23": {
"_meta": {
"title": "String (Multiline)"
},
"class_type": "JWStringMultiline",
"inputs": {
"text": "API_SPrompt"
}
},
"24": {
"_meta": {
"title": "String (Multiline)"
},
"class_type": "JWStringMultiline",
"inputs": {
"text": "API_NPrompt"
}
},
"25": {
"_meta": {
"title": "String (Multiline)"
},
"class_type": "JWStringMultiline",
"inputs": {
"text": "API_PPrompt"
}
},
"28": {
"_meta": {
"title": "Tiled VAE Decode"
},
"class_type": "VAEDecodeTiled_TiledDiffusion",
"inputs": {
"fast": false,
"samples": [
"14",
0
],
"tile_size": [
"53",
0
],
"vae": [
"4",
2
]
}
},
"36": {
"_meta": {
"title": "Save Image"
},
"class_type": "SaveImage",
"inputs": {
"filename_prefix": "API_",
"images": [
"52",
0
]
}
},
"4": {
"_meta": {
"title": "Load Checkpoint"
},
"class_type": "CheckpointLoaderSimple",
"inputs": {
"ckpt_name": "SDXL/realismEngineSDXL_v20VAE.safetensors"
}
},
"42": {
"_meta": {
"title": "Upscale Model Loader"
},
"class_type": "Upscale Model Loader",
"inputs": {
"model_name": "RealESRGAN_x2plus.pth"
}
},
"52": {
"_meta": {
"title": "Ultimate SD Upscale"
},
"class_type": "UltimateSDUpscale",
"inputs": {
"cfg": 8,
"denoise": 0.24,
"force_uniform_tiles": true,
"image": [
"28",
0
],
"mask_blur": 8,
"mode_type": "Linear",
"model": [
"12",
0
],
"negative": [
"7",
0
],
"positive": [
"21",
0
],
"sampler_name": "dpmpp_2m_sde",
"scheduler": "karras",
"seam_fix_denoise": 1,
"seam_fix_mask_blur": 8,
"seam_fix_mode": "None",
"seam_fix_padding": 16,
"seam_fix_width": 64,
"seed": 1041855229054013,
"steps": 16,
"tile_height": [
"53",
0
],
"tile_padding": 32,
"tile_width": [
"53",
0
],
"tiled_decode": true,
"upscale_by": 2,
"upscale_model": [
"42",
0
],
"vae": [
"4",
2
]
}
},
"53": {
"_meta": {
"title": "Integer"
},
"class_type": "JWInteger",
"inputs": {
"value": 768
}
},
"54": {
"_meta": {
"title": "AnyAspectRatio"
},
"class_type": "AnyAspectRatio",
"inputs": {
"height_ratio": 3,
"rounding_value": 32,
"side_length": 1023,
"width_ratio": 4
}
},
"6": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"12",
1
],
"text": [
"23",
0
]
}
},
"60": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"13",
1
],
"text": [
"23",
0
]
}
},
"61": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"13",
1
],
"text": [
"24",
0
]
}
},
"62": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"13",
1
],
"text": [
"25",
0
]
}
},
"63": {
"_meta": {
"title": "Conditioning (Combine)"
},
"class_type": "ConditioningCombine",
"inputs": {
"conditioning_1": [
"62",
0
],
"conditioning_2": [
"60",
0
]
}
},
"7": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"12",
1
],
"text": [
"24",
0
]
}
}
}

View file

@ -0,0 +1,486 @@
{
"4": {
"inputs": {
"ckpt_name": "SDXL/hassansdxl_v10.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"6": {
"inputs": {
"text": [
"17",
0
],
"clip": [
"15",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"7": {
"inputs": {
"text": [
"18",
0
],
"clip": [
"15",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"12": {
"inputs": {
"lora_name": "SDXL/styleWegg.safetensors",
"strength_model": 0.3,
"strength_clip": 0.25,
"model": [
"91",
0
],
"clip": [
"91",
1
]
},
"class_type": "LoraLoader",
"_meta": {
"title": "Load LoRA"
}
},
"13": {
"inputs": {
"lora_name": "SDXL/add-detail-xl.safetensors",
"strength_model": 0.2,
"strength_clip": 0.2,
"model": [
"12",
0
],
"clip": [
"12",
1
]
},
"class_type": "LoraLoader",
"_meta": {
"title": "Load LoRA"
}
},
"14": {
"inputs": {
"lora_name": "SDXL/amazing_portraits_xl_v1b.safetensors",
"strength_model": 0.5,
"strength_clip": 0.45,
"model": [
"13",
0
],
"clip": [
"13",
1
]
},
"class_type": "LoraLoader",
"_meta": {
"title": "Load LoRA"
}
},
"15": {
"inputs": {
"lora_name": "SDXL/sd_xl_offset_example-lora_1.0.safetensors",
"strength_model": 0.2,
"strength_clip": 0.15,
"model": [
"53",
0
],
"clip": [
"53",
1
]
},
"class_type": "LoraLoader",
"_meta": {
"title": "Load LoRA"
}
},
"17": {
"inputs": {
"text": "API_PPrompt"
},
"class_type": "JWStringMultiline",
"_meta": {
"title": "String (Multiline)"
}
},
"18": {
"inputs": {
"text": "API_NPrompt"
},
"class_type": "JWStringMultiline",
"_meta": {
"title": "String (Multiline)"
}
},
"23": {
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"evolution": 0,
"frame": 0,
"scale": 5,
"octaves": 8,
"persistence": 1.5,
"lacunarity": 2,
"exponent": 4,
"brightness": 0,
"contrast": 0
},
"class_type": "Perlin Power Fractal Settings (PPF Noise)",
"_meta": {
"title": "Perlin Power Fractal Settings 🦚"
}
},
"24": {
"inputs": {
"frequency": 320,
"octaves": 12,
"persistence": 1.5,
"num_colors": 16,
"color_tolerance": 0.05,
"angle_degrees": 45,
"brightness": 0,
"contrast": 0,
"blur": 2.5
},
"class_type": "Cross-Hatch Power Fractal Settings (PPF Noise)",
"_meta": {
"title": "Cross-Hatch Power Fractal Settings 🦚"
}
},
"37": {
"inputs": {
"seed": 923916094743956
},
"class_type": "Seed",
"_meta": {
"title": "Seed"
}
},
"38": {
"inputs": {
"batch_size": 1.3125,
"width": [
"95",
0
],
"height": [
"95",
1
],
"resampling": "nearest-exact",
"X": 0,
"Y": 0,
"Z": 0,
"evolution": 0,
"frame": 0,
"scale": 10,
"octaves": 8,
"persistence": 1.5,
"lacunarity": 3,
"exponent": 5,
"brightness": 0,
"contrast": 0,
"clamp_min": 0,
"clamp_max": 1,
"seed": [
"37",
3
],
"device": "cpu",
"optional_vae": [
"4",
2
],
"ppf_settings": [
"23",
0
]
},
"class_type": "Perlin Power Fractal Latent (PPF Noise)",
"_meta": {
"title": "Perlin Power Fractal Noise 🦚"
}
},
"43": {
"inputs": {
"seed": [
"37",
3
],
"steps": 32,
"cfg": 8.5,
"sampler_name": "dpmpp_2m_sde",
"scheduler": "karras",
"start_at_step": 0,
"end_at_step": 10000,
"enable_denoise": "false",
"denoise": 1,
"add_noise": "enable",
"return_with_leftover_noise": "disable",
"noise_type": "brownian_fractal",
"noise_blending": "cuberp",
"noise_mode": "additive",
"scale": 1,
"alpha_exponent": 1,
"modulator": 1,
"sigma_tolerance": 0.5,
"boost_leading_sigma": "false",
"guide_use_noise": "true",
"model": [
"15",
0
],
"positive": [
"98",
0
],
"negative": [
"7",
0
],
"latent_image": [
"38",
0
],
"ppf_settings": [
"23",
0
],
"ch_settings": [
"24",
0
]
},
"class_type": "Power KSampler Advanced (PPF Noise)",
"_meta": {
"title": "Power KSampler Advanced 🦚"
}
},
"44": {
"inputs": {
"samples": [
"43",
0
],
"vae": [
"4",
2
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"45": {
"inputs": {
"filename_prefix": "API_",
"images": [
"44",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "Save Image"
}
},
"53": {
"inputs": {
"lora_name": "SDXL/PerfectEyesXL.safetensors",
"strength_model": 0.5,
"strength_clip": 0.5,
"model": [
"14",
0
],
"clip": [
"14",
1
]
},
"class_type": "LoraLoader",
"_meta": {
"title": "Load LoRA"
}
},
"89": {
"inputs": {
"lora_name": "SDXL/ahxl_v1.safetensors",
"strength_model": 0.4,
"strength_clip": 0.33,
"model": [
"92",
0
],
"clip": [
"93",
0
]
},
"class_type": "LoraLoader",
"_meta": {
"title": "Load LoRA"
}
},
"90": {
"inputs": {
"lora_name": "SDXL/age.safetensors",
"strength_model": -0.8,
"strength_clip": -0.7000000000000001,
"model": [
"89",
0
],
"clip": [
"89",
1
]
},
"class_type": "LoraLoader",
"_meta": {
"title": "Load LoRA"
}
},
"91": {
"inputs": {
"lora_name": "SDXL/StokeRealV1.safetensors",
"strength_model": 0.2,
"strength_clip": 0.2,
"model": [
"90",
0
],
"clip": [
"90",
1
]
},
"class_type": "LoraLoader",
"_meta": {
"title": "Load LoRA"
}
},
"92": {
"inputs": {
"input": 0.36,
"middle": 0.5,
"out": 0.64,
"model1": [
"4",
0
],
"model2": [
"94",
0
]
},
"class_type": "ModelMergeBlocks",
"_meta": {
"title": "ModelMergeBlocks"
}
},
"93": {
"inputs": {
"ratio": 0.45,
"clip1": [
"4",
1
],
"clip2": [
"94",
1
]
},
"class_type": "CLIPMergeSimple",
"_meta": {
"title": "CLIPMergeSimple"
}
},
"94": {
"inputs": {
"ckpt_name": "SDXL/dreamshaperXL_alpha2Xl10.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"95": {
"inputs": {
"width_ratio": 5,
"height_ratio": 7,
"side_length": 1025,
"rounding_value": 64
},
"class_type": "AnyAspectRatio",
"_meta": {
"title": "AnyAspectRatio"
}
},
"96": {
"inputs": {
"text": "API_SPrompt"
},
"class_type": "JWStringMultiline",
"_meta": {
"title": "String (Multiline)"
}
},
"97": {
"inputs": {
"text": [
"96",
0
],
"clip": [
"15",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"98": {
"inputs": {
"conditioning_1": [
"6",
0
],
"conditioning_2": [
"97",
0
]
},
"class_type": "ConditioningCombine",
"_meta": {
"title": "Conditioning (Combine)"
}
}
}

View file

@ -0,0 +1,220 @@
{
"4": {
"inputs": {
"ckpt_name": "Other/dreamshaperXL_v21TurboDPMSDE.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"6": {
"inputs": {
"text": "API_PPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"7": {
"inputs": {
"text": "API_NPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"8": {
"inputs": {
"samples": [
"13",
0
],
"vae": [
"4",
2
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"9": {
"inputs": {
"filename_prefix": "API_",
"images": [
"8",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "Save Image"
}
},
"11": {
"inputs": {
"batch_size": 1,
"width": 1023,
"height": 1025,
"resampling": "nearest-exact",
"X": 0,
"Y": 0,
"Z": 0,
"evolution": 0,
"frame": 0,
"scale": 5,
"octaves": 8,
"persistence": 1.5,
"lacunarity": 2,
"exponent": 4,
"brightness": 0,
"contrast": 0,
"clamp_min": 0,
"clamp_max": 1,
"seed": 704513836266662,
"device": "cpu",
"optional_vae": [
"4",
2
],
"ppf_settings": [
"14",
0
]
},
"class_type": "Perlin Power Fractal Latent (PPF Noise)",
"_meta": {
"title": "Perlin Power Fractal Noise 🦚"
}
},
"13": {
"inputs": {
"seed": 525862638063448,
"steps": 8,
"cfg": 1.6,
"sampler_name": "dpmpp_2m_sde",
"scheduler": "karras",
"start_at_step": 0,
"end_at_step": 10000,
"enable_denoise": "false",
"denoise": 1,
"add_noise": "enable",
"return_with_leftover_noise": "disable",
"noise_type": "brownian_fractal",
"noise_blending": "cuberp",
"noise_mode": "additive",
"scale": 1,
"alpha_exponent": 1,
"modulator": 1,
"sigma_tolerance": 0.5,
"boost_leading_sigma": "false",
"guide_use_noise": "true",
"model": [
"4",
0
],
"positive": [
"20",
0
],
"negative": [
"7",
0
],
"latent_image": [
"11",
0
],
"ppf_settings": [
"14",
0
],
"ch_settings": [
"15",
0
]
},
"class_type": "Power KSampler Advanced (PPF Noise)",
"_meta": {
"title": "Power KSampler Advanced 🦚"
}
},
"14": {
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"evolution": 0,
"frame": 0,
"scale": 5,
"octaves": 8,
"persistence": 1.5,
"lacunarity": 2,
"exponent": 4,
"brightness": 0,
"contrast": 0
},
"class_type": "Perlin Power Fractal Settings (PPF Noise)",
"_meta": {
"title": "Perlin Power Fractal Settings 🦚"
}
},
"15": {
"inputs": {
"frequency": 320,
"octaves": 12,
"persistence": 1.5,
"num_colors": 16,
"color_tolerance": 0.05,
"angle_degrees": 45,
"brightness": 0,
"contrast": 0,
"blur": 2.5
},
"class_type": "Cross-Hatch Power Fractal Settings (PPF Noise)",
"_meta": {
"title": "Cross-Hatch Power Fractal Settings 🦚"
}
},
"20": {
"inputs": {
"conditioning_1": [
"6",
0
],
"conditioning_2": [
"21",
0
]
},
"class_type": "ConditioningCombine",
"_meta": {
"title": "Conditioning (Combine)"
}
},
"21": {
"inputs": {
"text": "API_SPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
}
}

View file

@ -0,0 +1,332 @@
{
"11": {
"_meta": {
"title": "Perlin Power Fractal Noise 🦚"
},
"class_type": "Perlin Power Fractal Latent (PPF Noise)",
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"batch_size": 1,
"brightness": 0,
"clamp_max": 1,
"clamp_min": 0,
"contrast": 0,
"device": "cpu",
"evolution": 0,
"exponent": 4,
"frame": 0,
"height": 1025,
"lacunarity": 2,
"octaves": 8,
"optional_vae": [
"4",
2
],
"persistence": 1.5,
"resampling": "nearest-exact",
"scale": 5,
"seed": 490162938389882,
"width": 1023
}
},
"13": {
"_meta": {
"title": "Power KSampler Advanced 🦚"
},
"class_type": "Power KSampler Advanced (PPF Noise)",
"inputs": {
"add_noise": "enable",
"alpha_exponent": 1,
"boost_leading_sigma": "false",
"cfg": 1.6,
"ch_settings": [
"15",
0
],
"denoise": 1,
"enable_denoise": "false",
"end_at_step": 10000,
"guide_use_noise": "true",
"latent_image": [
"11",
0
],
"model": [
"4",
0
],
"modulator": 1,
"negative": [
"7",
0
],
"noise_blending": "cuberp",
"noise_mode": "additive",
"noise_type": "brownian_fractal",
"positive": [
"20",
0
],
"ppf_settings": [
"14",
0
],
"return_with_leftover_noise": "disable",
"sampler_name": "dpmpp_2m_sde",
"scale": 1,
"scheduler": "karras",
"seed": 697312143874418,
"sigma_tolerance": 0.5,
"start_at_step": 0,
"steps": 8
}
},
"14": {
"_meta": {
"title": "Perlin Power Fractal Settings 🦚"
},
"class_type": "Perlin Power Fractal Settings (PPF Noise)",
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"brightness": 0,
"contrast": 0,
"evolution": 0,
"exponent": 4,
"frame": 0,
"lacunarity": 2,
"octaves": 8,
"persistence": 1.5,
"scale": 5
}
},
"15": {
"_meta": {
"title": "Cross-Hatch Power Fractal Settings 🦚"
},
"class_type": "Cross-Hatch Power Fractal Settings (PPF Noise)",
"inputs": {
"angle_degrees": 45,
"blur": 2.5,
"brightness": 0,
"color_tolerance": 0.05,
"contrast": 0,
"frequency": 320,
"num_colors": 16,
"octaves": 12,
"persistence": 1.5
}
},
"20": {
"_meta": {
"title": "Conditioning (Combine)"
},
"class_type": "ConditioningCombine",
"inputs": {
"conditioning_1": [
"6",
0
],
"conditioning_2": [
"21",
0
]
}
},
"21": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],
"text": "API_SPrompt"
}
},
"22": {
"_meta": {
"title": "Ultimate SD Upscale"
},
"class_type": "UltimateSDUpscale",
"inputs": {
"cfg": 8,
"denoise": 0.21,
"force_uniform_tiles": true,
"image": [
"8",
0
],
"mask_blur": 8,
"mode_type": "Linear",
"model": [
"4",
0
],
"negative": [
"23",
0
],
"positive": [
"6",
0
],
"sampler_name": "euler",
"scheduler": "normal",
"seam_fix_denoise": 1,
"seam_fix_mask_blur": 8,
"seam_fix_mode": "None",
"seam_fix_padding": 16,
"seam_fix_width": 64,
"seed": 470914682435746,
"steps": 20,
"tile_height": 512,
"tile_padding": 32,
"tile_width": 512,
"tiled_decode": false,
"upscale_by": 2,
"upscale_model": [
"24",
0
],
"vae": [
"4",
2
]
}
},
"23": {
"_meta": {
"title": "ConditioningZeroOut"
},
"class_type": "ConditioningZeroOut",
"inputs": {
"conditioning": [
"7",
0
]
}
},
"24": {
"_meta": {
"title": "Load Upscale Model"
},
"class_type": "UpscaleModelLoader",
"inputs": {
"model_name": "ESRGAN_SRx4_DF2KOST_official-ff704c30.pth"
}
},
"26": {
"_meta": {
"title": "Upscale Image (using Model)"
},
"class_type": "ImageUpscaleWithModel",
"inputs": {
"image": [
"22",
0
],
"upscale_model": [
"24",
0
]
}
},
"27": {
"_meta": {
"title": "Image Resize by Factor"
},
"class_type": "JWImageResizeByFactor",
"inputs": {
"factor": 0.5,
"image": [
"30",
0
],
"interpolation_mode": "bicubic"
}
},
"30": {
"_meta": {
"title": "ImageBlur"
},
"class_type": "ImageBlur",
"inputs": {
"blur_radius": 3,
"image": [
"26",
0
],
"sigma": 1.5
}
},
"4": {
"_meta": {
"title": "Load Checkpoint"
},
"class_type": "CheckpointLoaderSimple",
"inputs": {
"ckpt_name": "Other/dreamshaperXL_v21TurboDPMSDE.safetensors"
}
},
"6": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],
"text": "API_PPrompt"
}
},
"7": {
"_meta": {
"title": "CLIP Text Encode (Prompt)"
},
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],
"text": "API_NPrompt"
}
},
"8": {
"_meta": {
"title": "VAE Decode"
},
"class_type": "VAEDecode",
"inputs": {
"samples": [
"13",
0
],
"vae": [
"4",
2
]
}
},
"9": {
"_meta": {
"title": "Save Image"
},
"class_type": "SaveImage",
"inputs": {
"filename_prefix": "API_",
"images": [
"27",
0
]
}
}
}

View file

@ -0,0 +1,281 @@
{
"4": {
"inputs": {
"ckpt_name": "Other/dreamshaperXL_v21TurboDPMSDE.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"6": {
"inputs": {
"text": "API_PPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"7": {
"inputs": {
"text": "API_NPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"8": {
"inputs": {
"samples": [
"13",
0
],
"vae": [
"4",
2
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"9": {
"inputs": {
"filename_prefix": "API_",
"images": [
"27",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "Save Image"
}
},
"11": {
"inputs": {
"batch_size": 1,
"width": 1023,
"height": 1024,
"resampling": "nearest-exact",
"X": 0,
"Y": 0,
"Z": 0,
"evolution": 0,
"frame": 0,
"scale": 5,
"octaves": 8,
"persistence": 1.5,
"lacunarity": 2,
"exponent": 4,
"brightness": 0,
"contrast": 0,
"clamp_min": 0,
"clamp_max": 1,
"seed": 490162938389882,
"device": "cpu",
"optional_vae": [
"4",
2
]
},
"class_type": "Perlin Power Fractal Latent (PPF Noise)",
"_meta": {
"title": "Perlin Power Fractal Noise 🦚"
}
},
"13": {
"inputs": {
"seed": 697312143874418,
"steps": 8,
"cfg": 1.6,
"sampler_name": "dpmpp_2m_sde",
"scheduler": "karras",
"start_at_step": 0,
"end_at_step": 10000,
"enable_denoise": "false",
"denoise": 1,
"add_noise": "enable",
"return_with_leftover_noise": "disable",
"noise_type": "brownian_fractal",
"noise_blending": "cuberp",
"noise_mode": "additive",
"scale": 1,
"alpha_exponent": 1,
"modulator": 1,
"sigma_tolerance": 0.5,
"boost_leading_sigma": "false",
"guide_use_noise": "true",
"model": [
"4",
0
],
"positive": [
"20",
0
],
"negative": [
"7",
0
],
"latent_image": [
"11",
0
],
"ppf_settings": [
"14",
0
],
"ch_settings": [
"15",
0
]
},
"class_type": "Power KSampler Advanced (PPF Noise)",
"_meta": {
"title": "Power KSampler Advanced 🦚"
}
},
"14": {
"inputs": {
"X": 0,
"Y": 0,
"Z": 0,
"evolution": 0,
"frame": 0,
"scale": 5,
"octaves": 8,
"persistence": 1.5,
"lacunarity": 2,
"exponent": 4,
"brightness": 0,
"contrast": 0
},
"class_type": "Perlin Power Fractal Settings (PPF Noise)",
"_meta": {
"title": "Perlin Power Fractal Settings 🦚"
}
},
"15": {
"inputs": {
"frequency": 320,
"octaves": 12,
"persistence": 1.5,
"num_colors": 16,
"color_tolerance": 0.05,
"angle_degrees": 45,
"brightness": 0,
"contrast": 0,
"blur": 2.5
},
"class_type": "Cross-Hatch Power Fractal Settings (PPF Noise)",
"_meta": {
"title": "Cross-Hatch Power Fractal Settings 🦚"
}
},
"20": {
"inputs": {
"conditioning_1": [
"6",
0
],
"conditioning_2": [
"21",
0
]
},
"class_type": "ConditioningCombine",
"_meta": {
"title": "Conditioning (Combine)"
}
},
"21": {
"inputs": {
"text": "API_SPrompt",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"23": {
"inputs": {
"conditioning": [
"7",
0
]
},
"class_type": "ConditioningZeroOut",
"_meta": {
"title": "ConditioningZeroOut"
}
},
"24": {
"inputs": {
"model_name": "ESRGAN_SRx4_DF2KOST_official-ff704c30.pth"
},
"class_type": "UpscaleModelLoader",
"_meta": {
"title": "Load Upscale Model"
}
},
"26": {
"inputs": {
"upscale_model": [
"24",
0
],
"image": [
"8",
0
]
},
"class_type": "ImageUpscaleWithModel",
"_meta": {
"title": "Upscale Image (using Model)"
}
},
"27": {
"inputs": {
"factor": 0.5,
"interpolation_mode": "bicubic",
"image": [
"30",
0
]
},
"class_type": "JWImageResizeByFactor",
"_meta": {
"title": "Image Resize by Factor"
}
},
"30": {
"inputs": {
"blur_radius": 3,
"sigma": 1.5,
"image": [
"26",
0
]
},
"class_type": "ImageBlur",
"_meta": {
"title": "ImageBlur"
}
}
}

Binary file not shown.

View file

@ -0,0 +1,2 @@
#!/bin/bash
osascript /Users/sij/workshop/sijapi/helpers/updateCal.scpt

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,195 @@
from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, status
from fastapi.responses import JSONResponse
import httpx
import json
from pathlib import Path
import asyncio
from datetime import datetime
import os, io
from PyPDF2 import PdfReader
import aiohttp
hook = FastAPI()
# /Users/sij/Library/CloudStorage/OneDrive-WELC/Documents - WELC-Docket
SYNC_FOLDER = Path(__file__).resolve().parent.parent
HOME_FOLDER = Path.home()
DOCKETS_FOLDER = HOME_FOLDER / "Dockets"
SEARCH_FOLDER = HOME_FOLDER / "Watched Cases"
SCRIPTS_FOLDER = SYNC_FOLDER / ".scripts"
REQUESTS_FOLDER = HOME_FOLDER / "sync" / "requests"
COURTLISTENER_BASE_URL = "https://www.courtlistener.com"
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
COURTLISTENER_API_KEY = "efb5fe00f3c6c88d65a32541260945befdf53a7e"
with open(SCRIPTS_FOLDER / 'caseTable.json', 'r') as file:
CASE_TABLE = json.load(file)
@hook.get("/health")
async def health():
return {"status": "ok"}
@hook.post("/cl/docket")
async def respond(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
logging.info(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_docket.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(process_docket, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def process_docket(result):
async with httpx.AsyncClient() as session:
await process_docket_result(result, session)
async def process_docket_result(result, session):
docket = str(result.get('docket'))
case_code, case_shortname = get_case_details(docket)
date_filed = result.get('date_filed', 'No Date Filed')
try:
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
except ValueError:
date_filed_formatted = 'NoDateFiled'
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
logging.info(f"Fetching CourtListener docket information for {docket}...")
data = await response.json()
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name']
logging.info(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else:
logging.info("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket'
case_name = 'NoCaseName'
for document in result.get('recap_documents', []):
filepath_ia = document.get('filepath_ia')
filepath_local = document.get('filepath_local')
if filepath_ia:
file_url = filepath_ia
logging.info(f"Found IA file at {file_url}.")
elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
logging.info(f"Found local file at {file_url}.")
else:
logging.info(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue
document_number = document.get('document_number', 'NoDocumentNumber')
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
description = description[:50] # Truncate description
# case_shortname = case_name # TEMPORARY OVERRIDE
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
target_path = Path(DOCKETS_FOLDER) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await download_file(file_url, target_path, session)
logging.info(f"Downloaded {file_name} to {target_path}")
def get_case_details(docket):
case_info = CASE_TABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
case_code = case_info.get("code")
short_name = case_info.get("shortname")
return case_code, short_name
async def download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
async with aiohttp.ClientSession() as session:
logging.info(f"Attempting to download {url} to {path}.")
try:
async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403:
logging.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return
response.raise_for_status()
# Check if the response content type is a PDF
content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf':
logging.error(f"Invalid content type: {content_type}. Skipping download.")
return
# Create an in-memory buffer to store the downloaded content
buffer = io.BytesIO()
async for chunk in response.content.iter_chunked(1024):
buffer.write(chunk)
# Reset the buffer position to the beginning
buffer.seek(0)
# Validate the downloaded PDF content
try:
PdfReader(buffer)
except Exception as e:
logging.error(f"Invalid PDF content: {str(e)}. Skipping download.")
return
# If the PDF is valid, write the content to the file on disk
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('wb') as file:
file.write(buffer.getvalue())
except Exception as e:
logging.error(f"Error downloading file: {str(e)}")
@hook.post("/cl/search")
async def respond_search(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
logging.info(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
# Save the payload data
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_search.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(process_search_result, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def process_search_result(result):
async with httpx.AsyncClient() as session:
download_url = result.get('download_url')
court_id = result.get('court_id')
case_name_short = result.get('caseNameShort')
case_name = result.get('caseName')
logging.info(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id
if case_name_short:
case_folder = case_name_short
else:
case_folder = case_name
file_name = download_url.split('/')[-1]
target_path = Path(SEARCH_FOLDER) / court_folder / case_folder / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await download_file(download_url, target_path, session)
logging.info(f"Downloaded {file_name} to {target_path}")

View file

@ -0,0 +1,32 @@
import json
import requests
# Load the caseTable.json file
with open('caseTable.json', 'r') as file:
case_table = json.load(file)
# Set the base URL and authorization token
base_url = "https://www.courtlistener.com/api/rest/v3/docket-alerts/"
auth_token = "a90d3f2de489aa4138a32133ca8bfec9d85fecfa"
# Iterate through each key (docket ID) in the case table
for docket_id in case_table.keys():
# Set the data payload and headers for the request
data = {'docket': docket_id}
headers = {'Authorization': f'Token {auth_token}'}
try:
# Send the POST request to the CourtListener API
response = requests.post(base_url, data=data, headers=headers)
# Check the response status code
if response.status_code == 200:
print(f"Successfully created docket alert for docket ID: {docket_id}")
else:
print(f"Failed to create docket alert for docket ID: {docket_id}")
print(f"Status code: {response.status_code}")
print(f"Response content: {response.content}")
except requests.exceptions.RequestException as e:
print(f"Error occurred while creating docket alert for docket ID: {docket_id}")
print(f"Error message: {str(e)}")

View file

@ -0,0 +1,146 @@
#!/bin/bash
DB_NAME="weatherlocate.db"
# Step 1: Backup existing data
echo "Backing up existing data..."
sqlite3 $DB_NAME <<EOF
.headers on
.mode csv
.output hourly_weather_backup.csv
SELECT * FROM HourlyWeather;
.output daily_weather_backup.csv
SELECT * FROM DailyWeather;
.output hours_backup.csv
SELECT * FROM Hours;
.output days_backup.csv
SELECT * FROM Days;
EOF
# Step 2: Drop and recreate tables
echo "Dropping and recreating tables..."
sqlite3 $DB_NAME <<EOF
DROP TABLE IF EXISTS HourlyWeather;
DROP TABLE IF EXISTS DailyWeather;
DROP TABLE IF EXISTS Hours;
DROP TABLE IF EXISTS Days;
CREATE TABLE HourlyWeather (
id INTEGER PRIMARY KEY,
datetime TEXT NOT NULL,
temp REAL,
feelslike REAL,
humidity REAL,
dew REAL,
precip REAL,
precipprob REAL,
snow REAL,
snowdepth REAL,
windgust REAL,
windspeed REAL,
winddir REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
uvindex REAL,
severerisk REAL,
conditions TEXT,
icon TEXT,
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE DailyWeather (
id INTEGER PRIMARY KEY,
sunrise_time TEXT,
sunset_time TEXT,
description TEXT,
tempmax REAL,
tempmin REAL,
uvindex REAL,
winddir REAL,
windspeedmean REAL,
windspeed REAL,
icon TEXT,
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE Hours (
id INTEGER PRIMARY KEY,
day_id INTEGER,
hour INTEGER,
hourly_weather_id INTEGER,
FOREIGN KEY (day_id) REFERENCES Days(id),
FOREIGN KEY (hourly_weather_id) REFERENCES HourlyWeather(id)
);
CREATE TABLE Days (
id INTEGER PRIMARY KEY,
date TEXT NOT NULL,
daily_weather_id INTEGER,
FOREIGN KEY (daily_weather_id) REFERENCES DailyWeather(id)
);
EOF
# Step 3: Import data from backup files
echo "Importing data from backup files..."
python3 <<EOF
import sqlite3
import csv
from datetime import datetime
def import_data():
conn = sqlite3.connect('$DB_NAME')
cursor = conn.cursor()
with open('hourly_weather_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO HourlyWeather (datetime, temp, feelslike, humidity, dew, precip, precipprob, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy, uvindex, severerisk, conditions, icon, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
row['datetime'], row['temp'], row['feelslike'], row['humidity'], row['dew'], row['precip'],
row['precipprob'], row['snow'], row['snowdepth'], row['windgust'], row['windspeed'], row['winddir'],
row['pressure'], row['cloudcover'], row['visibility'], row['solarradiation'], row['solarenergy'], row['uvindex'],
row['severerisk'], row['conditions'], row['icon'],
datetime.strptime(row['last_updated'], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
))
with open('daily_weather_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO DailyWeather (sunrise_time, sunset_time, description, tempmax, tempmin, uvindex, winddir, windspeedmean, windspeed, icon, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
row['sunrise_time'], row['sunset_time'], row['description'], row['tempmax'], row['tempmin'],
row['uvindex'], row['winddir'], row['windspeedmean'], row['windspeed'], row['icon'],
datetime.strptime(row['last_updated'], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
))
with open('hours_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO Hours (day_id, hour, hourly_weather_id)
VALUES (?, ?, ?)
''', (row['day_id'], row['hour'], row['hourly_weather_id']))
with open('days_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO Days (date, daily_weather_id)
VALUES (?, ?)
''', (row['date'], row['daily_weather_id']))
conn.commit()
conn.close()
import_data()
EOF
echo "Database rebuild complete."

View file

@ -0,0 +1,123 @@
import sqlite3
from pathlib import Path
# Get the home directory
home_dir = Path.home()
# Define the path to the database
DB = home_dir / "sync" / "sijapi" / "data" / "weatherlocate.db"
def create_database():
with sqlite3.connect(DB) as conn:
cursor = conn.cursor()
# Create the Locations table
cursor.execute('''
CREATE TABLE IF NOT EXISTS Locations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
street TEXT,
city TEXT,
state TEXT,
country TEXT,
latitude REAL,
longitude REAL,
zip TEXT,
elevation REAL,
last_updated DATETIME
);
''')
# Create the Days table with a direct reference to DailyWeather
cursor.execute('''
CREATE TABLE IF NOT EXISTS Days (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date DATE UNIQUE NOT NULL,
daily_weather_id INTEGER,
general_location_id INTEGER,
FOREIGN KEY(daily_weather_id) REFERENCES DailyWeather(id),
FOREIGN KEY(general_location_id) REFERENCES Locations(id)
);
''')
# Create the DailyWeather table with fields adjusted for direct CSV storage of preciptype
cursor.execute('''
CREATE TABLE IF NOT EXISTS DailyWeather (
id INTEGER PRIMARY KEY AUTOINCREMENT,
sunrise TEXT,
sunriseEpoch TEXT,
sunset TEXT,
sunsetEpoch TEXT,
description TEXT,
tempmax REAL,
tempmin REAL,
uvindex INTEGER,
winddir REAL,
windspeed REAL,
icon TEXT,
last_updated DATETIME,
datetime TEXT,
datetimeEpoch INTEGER,
temp REAL,
feelslikemax REAL,
feelslikemin REAL,
feelslike REAL,
dew REAL,
humidity REAL,
precip REAL,
precipprob REAL,
precipcover REAL,
preciptype TEXT,
snow REAL,
snowdepth REAL,
windgust REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
severerisk REAL,
moonphase REAL,
conditions TEXT,
stations TEXT,
source TEXT
);
''')
# Create the HourlyWeather table
cursor.execute('''
CREATE TABLE IF NOT EXISTS HourlyWeather (
id INTEGER PRIMARY KEY AUTOINCREMENT,
day_id INTEGER,
datetime TEXT,
datetimeEpoch INTEGER,
temp REAL,
feelslike REAL,
humidity REAL,
dew REAL,
precip REAL,
precipprob REAL,
snow REAL,
snowdepth REAL,
preciptype TEXT,
windgust REAL,
windspeed REAL,
winddir REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
uvindex REAL,
severerisk REAL,
conditions TEXT,
icon TEXT,
stations TEXT,
source TEXT,
FOREIGN KEY(day_id) REFERENCES Days(id)
);
''')
conn.commit()
if __name__ == "__main__":
create_database()

View file

@ -0,0 +1,89 @@
import osmium
import psycopg2
import json
from sijapi import DB_USER, DB_PASS, DB_HOST, DB, DATA_DIR
OSM_DATA_PATH = DATA_DIR / "north-america-latest.osm.pbf"
class OSMHandler(osmium.SimpleHandler):
def __init__(self, conn):
osmium.SimpleHandler.__init__(self)
self.conn = conn
def node(self, n):
tags = {tag.k: tag.v for tag in n.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO nodes (id, location, tags)
VALUES (%s, ST_SetSRID(ST_MAKEPOINT(%s, %s),4326), %s)
""",
(n.id, n.location.lon, n.location.lat, json.dumps(tags)))
self.conn.commit()
def way(self, w):
nodes = [(node.lon, node.lat) for node in w.nodes]
tags = {tag.k: tag.v for tag in w.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO ways (id, nodes, tags)
VALUES (%s, %s, %s)
""",
(w.id, json.dumps(nodes), json.dumps(tags)))
self.conn.commit()
def relation(self, r):
members = [{"type": m.type, "ref": m.ref, "role": m.role} for m in r.members]
tags = {tag.k: tag.v for tag in r.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO relations (id, members, tags)
VALUES (%s, %s, %s)
""",
(r.id, json.dumps(members), json.dumps(tags)))
self.conn.commit()
def main():
conn = psycopg2.connect(user=DB_USER, password=DB_PASS, dbname=DB, host=DB_HOST)
cur = conn.cursor()
# Drop existing tables if they exist
cur.execute("DROP TABLE IF EXISTS nodes")
cur.execute("DROP TABLE IF EXISTS ways")
cur.execute("DROP TABLE IF EXISTS relations")
# Create tables for nodes, ways, and relations
cur.execute("""
CREATE TABLE nodes (
id bigint PRIMARY KEY,
location geography(POINT, 4326),
tags jsonb
)
""")
cur.execute("""
CREATE TABLE ways (
id bigint PRIMARY KEY,
nodes jsonb,
tags jsonb
)
""")
cur.execute("""
CREATE TABLE relations (
id bigint PRIMARY KEY,
members jsonb,
tags jsonb
)
""")
conn.commit()
handler = OSMHandler(conn)
handler.apply_file(str(OSM_DATA_PATH))
cur.close()
conn.close()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,76 @@
from vectordb import Memory
memory = Memory(memory_file="embedding.pt",
chunking_strategy={"mode": "sliding_window", "window_size": 128, "overlap": 16}, embeddings='TaylorAI/bge-micro-v2'
)
texts = [
"""
Machine learning is a method of data analysis that automates analytical model building.
It is a branch of artificial intelligence based on the idea that systems can learn from data,
identify patterns and make decisions with minimal human intervention.
Machine learning algorithms are trained on data sets that contain examples of the desired output. For example, a machine learning algorithm that is used to classify images might be trained on a data set that contains images of cats and dogs.
Once an algorithm is trained, it can be used to make predictions on new data. For example, the machine learning algorithm that is used to classify images could be used to predict whether a new image contains a cat or a dog.
Machine learning algorithms can be used to solve a wide variety of problems. Some common applications of machine learning include:
Classification: Categorizing data into different groups. For example, a machine learning algorithm could be used to classify emails as spam or not spam.
Regression: Predicting a continuous value. For example, a machine learning algorithm could be used to predict the price of a house.
Clustering: Finding groups of similar data points. For example, a machine learning algorithm could be used to find groups of customers with similar buying habits.
Anomaly detection: Finding data points that are different from the rest of the data. For example, a machine learning algorithm could be used to find fraudulent credit card transactions.
Machine learning is a powerful tool that can be used to solve a wide variety of problems. As the amount of data available continues to grow, machine learning is likely to become even more important in the future.
""",
"""
Artificial intelligence (AI) is the simulation of human intelligence in machines
that are programmed to think like humans and mimic their actions.
The term may also be applied to any machine that exhibits traits associated with
a human mind such as learning and problem-solving.
AI research has been highly successful in developing effective techniques for solving a wide range of problems, from game playing to medical diagnosis.
However, there is still a long way to go before AI can truly match the intelligence of humans. One of the main challenges is that human intelligence is incredibly complex and poorly understood.
Despite the challenges, AI is a rapidly growing field with the potential to revolutionize many aspects of our lives. Some of the potential benefits of AI include:
Increased productivity: AI can be used to automate tasks that are currently performed by humans, freeing up our time for more creative and fulfilling activities.
Improved decision-making: AI can be used to make more informed decisions, based on a wider range of data than humans can typically access.
Enhanced creativity: AI can be used to generate new ideas and solutions, beyond what humans can imagine on their own.
Of course, there are also potential risks associated with AI, such as:
Job displacement: As AI becomes more capable, it is possible that it will displace some human workers.
Weaponization: AI could be used to develop new weapons that are more powerful and destructive than anything we have today.
Loss of control: If AI becomes too powerful, we may lose control over it, with potentially disastrous consequences.
It is important to weigh the potential benefits and risks of AI carefully as we continue to develop this technology. With careful planning and oversight, AI has the potential to make the world a better place. However, if we are not careful, it could also lead to serious problems.
""",
]
metadata_list = [
{
"title": "Introduction to Machine Learning",
"url": "https://example.com/introduction-to-machine-learning",
},
{
"title": "Introduction to Artificial Intelligence",
"url": "https://example.com/introduction-to-artificial-intelligence",
},
]
memory.save(texts, metadata_list)
query = "What is the relationship between AI and machine learning?"
results = memory.search(query, top_n=3, unique=True)
print(results)
# two results will be returned as unique param is set to True

View file

@ -0,0 +1,15 @@
from vectordb import Memory
# Memory is where all content you want to store/search goes.
memory = Memory()
memory.save(
["apples are green", "oranges are orange"], # save your text content. for long text we will automatically chunk it
[{"url": "https://apples.com"}, {"url": "https://oranges.com"}], # associate any kind of metadata with it (optional)
)
# Search for top n relevant results, automatically using embeddings
query = "green"
results = memory.search(query, top_n = 1)
print(results)

View file

@ -0,0 +1,46 @@
# Updates watch complications for Secure ShellFish
#
# This command sends encrypted data through push notifications such
# that it doesn't need to run from a Secure ShellFish terminal.
if [[ $# -eq 0 ]]; then
cat <<EOF
# Usage: widget [target] <data> ...
# Update complication on device from which this function was installed with a number of content parameters that can be string, progress, icon, target or color.
# Each argument type is derived from input.
# Progress has the form: 50% or 110/220
# Icon must match valid SF Symbol name such as globe or terminal.fill
# Colors must be hex colours such as #000 #ff00ff where the color is used for later content and 'foreground' switches back to default colour
# Target is used to send different content to different complications after configuring the complications with different target identifiers which requires the pro unlock. The target parameter is never assumed unless --target is used and is effective until next --target parameter allowing updates of several complications with a single command
# You can configure complications to only show content for a given target.
# String is the fallback type if nothing else matches, but content type can be forced for next parameter with --progress, --icon, --color, --text or --target with
# something like:
widget --text "50/100"
# You can update several complications at once by using --target to send all parameters until the next --target to a particular complication. Updating several complications at once allows more total updates per day.
# EOF
# return 0
# fi
# local key=d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b
# local user=WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm
# local iv=ab5bbeb426015da7eedcee8bee3dffb7
# local plain=$(
# echo Secure ShellFish Widget 2.0
# for var in "$@"
# do
# echo -ne "$var" | base64
# done)
# local base64=$(echo "$plain" | openssl enc -aes-256-cbc -base64 -K $key -iv $iv)
# curl -sS -X POST -H "Content-Type: text/plain" --data "$base64" "https://secureshellfish.app/push/?user=$user"

View file

@ -0,0 +1,17 @@
#!/bin/bash
# Iterate from 18 to 30
for i in $(seq -w 01 31); do
# Construct the date string
DATE="2024-07-${i}"
# Print the date being processed (optional)
echo "Processing date: $DATE"
# Run the curl command
curl -X POST -H "Content-Type: application/json" -d '{"mood": "joyful"}' "http://localhost:4444/note/banner?dt=$DATE"
# Wait for the curl command to finish before starting the next iteration
wait
done

File diff suppressed because it is too large Load diff

88
sijapi/logs.py Normal file
View file

@ -0,0 +1,88 @@
import os
import sys
import logging
from logging.handlers import RotatingFileHandler
from colorama import Fore, Back, Style, init as colorama_init
import traceback
# Force colorama to initialize for the current platform
colorama_init(autoreset=True, strip=False, convert=True)
class ColorFormatter(logging.Formatter):
"""Custom formatter to add colors to log levels."""
COLOR_MAP = {
logging.DEBUG: Fore.CYAN,
logging.INFO: Fore.GREEN,
logging.WARNING: Fore.YELLOW,
logging.ERROR: Fore.RED,
logging.CRITICAL: Fore.MAGENTA + Back.WHITE,
}
def format(self, record):
log_message = super().format(record)
color = self.COLOR_MAP.get(record.levelno, '')
return f"{color}{log_message}{Style.RESET_ALL}"
class Logger:
def __init__(self, name, logs_dir):
self.logs_dir = logs_dir
self.logger = logging.getLogger(name)
self.logger.setLevel(logging.DEBUG)
def setup_from_args(self, args):
if not os.path.exists(self.logs_dir):
os.makedirs(self.logs_dir)
# File handler
handler_path = os.path.join(self.logs_dir, 'app.log')
file_handler = RotatingFileHandler(handler_path, maxBytes=2000000, backupCount=10)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
file_handler.setLevel(logging.DEBUG)
# Console handler
console_handler = logging.StreamHandler(sys.stdout) # Explicitly use sys.stdout
console_formatter = ColorFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(console_formatter)
# Set console handler level based on args
if args.debug:
console_handler.setLevel(logging.DEBUG)
else:
console_handler.setLevel(logging.INFO)
# Add handlers to logger
self.logger.addHandler(file_handler)
self.logger.addHandler(console_handler)
# Test color output
self.logger.debug("Debug message (should be Cyan)")
self.logger.info("Info message (should be Green)")
self.logger.warning("Warning message (should be Yellow)")
self.logger.error("Error message (should be Red)")
self.logger.critical("Critical message (should be Magenta on White)")
def get_logger(self):
return self.logger
# Add this at the end of the file for testing
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
logger = Logger("test", "logs")
logger.setup_from_args(args)
test_logger = logger.get_logger()
print("FORCE_COLOR:", os.environ.get('FORCE_COLOR'))
print("NO_COLOR:", os.environ.get('NO_COLOR'))
print("TERM:", os.environ.get('TERM'))
print("PYCHARM_HOSTED:", os.environ.get('PYCHARM_HOSTED'))
print("PYTHONIOENCODING:", os.environ.get('PYTHONIOENCODING'))
test_logger.debug("This is a debug message")
test_logger.info("This is an info message")
test_logger.warning("This is a warning message")
test_logger.error("This is an error message")
test_logger.critical("This is a critical message")

165
sijapi/routers/asr.py Normal file
View file

@ -0,0 +1,165 @@
'''
Automatic Speech Recognition module relying on the `whisper_cpp` implementation of OpenAI's Whisper model.
Depends on:
LOGGER, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR
Notes:
Performs exceptionally well on Apple Silicon. Other devices will benefit from future updates to optionally use `faster_whisper`, `insanely_faster_whisper`, and/or `whisper_jax`.
'''
from fastapi import APIRouter, HTTPException, Form, UploadFile, File
from pydantic import BaseModel, Field
from typing import Optional
import tempfile
from fastapi.responses import JSONResponse, FileResponse
from pydantic import BaseModel, HttpUrl
from whisperplus.pipelines import mlx_whisper
from youtube_dl import YoutubeDL
from urllib.parse import unquote
import subprocess
import os
import uuid
from threading import Thread
import multiprocessing
import asyncio
import subprocess
import tempfile
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES
asr = APIRouter()
class TranscribeParams(BaseModel):
model: str = Field(default="small")
output_srt : Optional[bool] = Field(default=False)
language : Optional[str] = Field(None)
split_on_word : Optional[bool] = Field(default=False)
temperature : Optional[float] = Field(default=0)
temp_increment : Optional[int] = Field(None)
translate : Optional[bool] = Field(default=False)
diarize : Optional[bool] = Field(default=False)
tiny_diarize : Optional[bool] = Field(default=False)
no_fallback : Optional[bool] = Field(default=False)
output_json : Optional[bool] = Field(default=False)
detect_language : Optional[bool] = Field(default=False)
dtw : Optional[str] = Field(None)
threads : Optional[int] = Field(None)
from urllib.parse import unquote
import json
@asr.post("/asr")
@asr.post("/transcribe")
@asr.post("/v1/audio/transcription")
async def transcribe_endpoint(
file: UploadFile = File(...),
params: str = Form(...)
):
try:
# Decode the URL-encoded string
decoded_params = unquote(params)
# Parse the JSON string
parameters_dict = json.loads(decoded_params)
# Create TranscribeParams object
parameters = TranscribeParams(**parameters_dict)
except json.JSONDecodeError as json_err:
raise HTTPException(status_code=400, detail=f"Invalid JSON: {str(json_err)}")
except Exception as err:
raise HTTPException(status_code=400, detail=f"Error parsing parameters: {str(err)}")
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(await file.read())
temp_file_path = temp_file.name
transcription = await transcribe_audio(file_path=temp_file_path, params=parameters)
return transcription
async def transcribe_audio(file_path, params: TranscribeParams):
file_path = convert_to_wav(file_path)
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
command.extend(['-m', str(model_path)])
command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
command.extend(['-np']) # Always enable no-prints
if params.split_on_word:
command.append('-sow')
if params.temperature > 0:
command.extend(['-tp', str(params.temperature)])
if params.temp_increment:
command.extend(['-tpi', str(params.temp_increment)])
if params.language:
command.extend(['-l', params.language])
elif params.detect_language:
command.append('-dl')
if params.translate:
command.append('-tr')
if params.diarize:
command.append('-di')
if params.tiny_diarize:
command.append('-tdrz')
if params.no_fallback:
command.append('-nf')
if params.output_srt:
command.append('-osrt')
elif params.output_json:
command.append('-oj')
else:
command.append('-nt')
if params.dtw:
command.extend(['--dtw', params.dtw])
command.extend(['-f', file_path])
DEBUG(f"Command: {command}")
proc = await asyncio.create_subprocess_exec(
*command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise Exception(f"Error running command: {stderr.decode()}")
result = stdout.decode().strip()
DEBUG(f"Result: {result}")
return result
def convert_to_wav(file_path: str):
wav_file_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.wav")
subprocess.run(["ffmpeg", "-y", "-i", file_path, "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_path], check=True)
return wav_file_path
def download_from_youtube(url: str):
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
ytdl_opts = {
'outtmpl': temp_file,
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
'nooverwrites': True
}
with YoutubeDL(ytdl_opts) as ydl:
ydl.download([url])
return convert_to_wav(temp_file)
def format_srt_timestamp(seconds: float):
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
def write_srt(segments: list, output_file: str):
with open(output_file, 'w') as f:
for i, segment in enumerate(segments, start=1):
start = format_srt_timestamp(segment['start'])
end = format_srt_timestamp(segment['end'])
text = segment['text']
f.write(f"{i}\n{start} --> {end}\n{text}\n\n")

415
sijapi/routers/calendar.py Normal file
View file

@ -0,0 +1,415 @@
'''
Calendar module using macOS Calendars and/or Microsoft 365 via its Graph API.
Depends on:
LOGGER, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
'''
from fastapi import APIRouter, Depends, HTTPException, status, Request
from fastapi.responses import RedirectResponse, JSONResponse
from fastapi.security import OAuth2PasswordBearer
import httpx
import json
import os
import time
from dateutil.parser import isoparse as parse_iso
import threading
from typing import Dict, List, Any
from datetime import datetime, timedelta
from Foundation import NSDate, NSRunLoop
import EventKit as EK
from sijapi import ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
from sijapi.utilities import localize_dt, localize_dt
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
calendar = APIRouter()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
timeout = httpx.Timeout(12)
if MS365_TOGGLE is True:
CRITICAL(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
@calendar.get("/o365/login")
async def login():
DEBUG(f"Received request to /o365/login")
DEBUG(f"SCOPE: {MS365_SCOPE}")
if not MS365_SCOPE:
ERR("No scopes defined for authorization.")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="No scopes defined for authorization."
)
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
INFO(f"Redirecting to authorization URL: {authorization_url}")
return RedirectResponse(authorization_url)
@calendar.get("/o365/oauth_redirect")
async def oauth_redirect(code: str = None, error: str = None):
INFO(f"Received request to /o365/oauth_redirect")
if error:
ERR(f"OAuth2 Error: {error}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
)
INFO(f"Requesting token with authorization code: {code}")
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
data = {
"client_id": MS365_CLIENT_ID,
"client_secret": MS365_SECRET,
"code": code,
"redirect_uri": MS365_REDIRECT_PATH,
"grant_type": "authorization_code"
}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(token_url, data=data)
DEBUG(f"Token endpoint response status code: {response.status_code}")
INFO(f"Token endpoint response text: {response.text}")
result = response.json()
if 'access_token' in result:
await save_token(result)
INFO("Access token obtained successfully")
return {"message": "Access token stored successfully"}
else:
CRITICAL(f"Failed to obtain access token. Response: {result}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to obtain access token"
)
@calendar.get("/o365/me")
async def read_items():
INFO(f"Received request to /o365/me")
token = await load_token()
if not token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Access token not found",
)
graph_url = "https://graph.microsoft.com/v1.0/me"
headers = {"Authorization": f"Bearer {token['access_token']}"}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(graph_url, headers=headers)
if response.status_code == 200:
user = response.json()
INFO(f"User retrieved: {user}")
return user
else:
ERR("Invalid or expired token")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired token",
headers={"WWW-Authenticate": "Bearer"},
)
async def save_token(token):
DEBUG(f"Saving token: {token}")
try:
token["expires_at"] = int(time.time()) + token["expires_in"]
with open(MS365_TOKEN_PATH, "w") as file:
json.dump(token, file)
DEBUG(f"Saved token to {MS365_TOKEN_PATH}")
except Exception as e:
ERR(f"Failed to save token: {e}")
async def load_token():
if os.path.exists(MS365_TOKEN_PATH):
try:
with open(MS365_TOKEN_PATH, "r") as file:
token = json.load(file)
except FileNotFoundError:
ERR("Token file not found.")
return None
except json.JSONDecodeError:
ERR("Failed to decode token JSON")
return None
if token:
token["expires_at"] = int(time.time()) + token["expires_in"]
DEBUG(f"Loaded token: {token}") # Add this line to log the loaded token
return token
else:
DEBUG("No token found.")
return None
else:
ERR(f"No file found at {MS365_TOKEN_PATH}")
return None
async def is_token_expired(token):
if "expires_at" not in token:
return True # Treat missing expiration time as expired token
expiry_time = datetime.fromtimestamp(token["expires_at"])
return expiry_time <= datetime.now()
async def is_token_expired2(token):
graph_url = "https://graph.microsoft.com/v1.0/me"
headers = {"Authorization": f"Bearer {token}"}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(graph_url, headers=headers)
return response.status_code == 401
async def get_new_token_with_refresh_token(refresh_token):
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
data = {
"client_id": MS365_CLIENT_ID,
"client_secret": MS365_SECRET,
"refresh_token": refresh_token,
"grant_type": "refresh_token",
"scope": " ".join(MS365_SCOPE),
}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(token_url, data=data)
result = response.json()
if "access_token" in result:
INFO("Access token refreshed successfully")
return result
else:
ERR("Failed to refresh access token")
return None
async def refresh_token():
token = await load_token()
if not token:
ERR("No token found in storage")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="No token found",
)
if 'refresh_token' not in token:
ERR("Refresh token not found in the loaded token")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Refresh token not found",
)
refresh_token = token['refresh_token']
DEBUG("Found refresh token, attempting to refresh access token")
new_token = await get_new_token_with_refresh_token(refresh_token)
if new_token:
await save_token(new_token)
INFO("Token refreshed and saved successfully")
else:
ERR("Failed to refresh token")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to refresh token",
)
def get_calendar_ids() -> Dict[str, str]:
event_store = EK.EKEventStore.alloc().init()
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
calendar_identifiers = {
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
}
INFO(f"{calendar_identifiers}")
return calendar_identifiers
# Helper to convert datetime to NSDate
def datetime_to_nsdate(dt: datetime) -> NSDate:
return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp())
@calendar.get("/events")
async def get_events_endpoint(start_date: str, end_date: str):
start_dt = localize_dt(start_date)
end_dt = localize_dt(end_date)
datetime.strptime(start_date, "%Y-%m-%d") or datetime.now()
end_dt = datetime.strptime(end_date, "%Y-%m-%d") or datetime.now()
response = await get_events(start_dt, end_dt)
return JSONResponse(content=response, status_code=200)
async def get_events(start_dt: datetime, end_dt: datetime) -> List:
combined_events = []
if MS365_TOGGLE:
ms_events = await get_ms365_events(start_dt, end_dt)
combined_events.extend(ms_events) # Use extend instead of append
if ICAL_TOGGLE:
calendar_ids = ICALENDARS
macos_events = get_macos_calendar_events(start_dt, end_dt, calendar_ids)
combined_events.extend(macos_events) # Use extend instead of append
parsed_events = await parse_calendar_for_day(start_dt, end_dt, combined_events)
return parsed_events
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
event_store = EK.EKEventStore.alloc().init()
# Request access to EventKit
def request_access() -> bool:
access_granted = []
def completion_handler(granted, error):
if error is not None:
ERR(f"Error: {error}")
access_granted.append(granted)
# Notify the main thread that the completion handler has executed
with access_granted_condition:
access_granted_condition.notify()
access_granted_condition = threading.Condition()
with access_granted_condition:
event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent
# Wait for the completion handler to be called
access_granted_condition.wait(timeout=10)
# Verify that the handler was called and access_granted is not empty
if access_granted:
return access_granted[0]
else:
ERR("Request access timed out or failed")
return False
if not request_access():
ERR("Access to calendar data was not granted")
return []
ns_start_date = datetime_to_nsdate(start_date)
ns_end_date = datetime_to_nsdate(end_date)
# Retrieve all calendars
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
if calendar_ids:
selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids]
else:
selected_calendars = all_calendars
# Filtering events by selected calendars
predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars)
events = event_store.eventsMatchingPredicate_(predicate)
event_list = []
for event in events:
# Check if event.attendees() returns None
if event.attendees():
attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()]
else:
attendees = []
# Format the start and end dates properly
start_date_str = event.startDate().descriptionWithLocale_(None)
end_date_str = event.endDate().descriptionWithLocale_(None)
event_data = {
"subject": event.title(),
"id": event.eventIdentifier(),
"start": start_date_str,
"end": end_date_str,
"bodyPreview": event.notes() if event.notes() else '',
"attendees": attendees,
"location": event.location() if event.location() else '',
"onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this
"showAs": 'busy', # Default to 'busy'
"isAllDay": event.isAllDay()
}
event_list.append(event_data)
return event_list
async def get_ms365_events(start_date: datetime, end_date: datetime):
token = await load_token()
if token:
if await is_token_expired(token):
await refresh_token()
else:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Access token not found",
)
# this looks like it might need updating to use tz-aware datetimes converted to UTC...
graph_url = f"https://graph.microsoft.com/v1.0/me/events?$filter=start/dateTime ge '{start_date}T00:00:00' and end/dateTime le '{end_date}T23:59:59'"
headers = {
"Authorization": f"Bearer {token['access_token']}",
"Prefer": 'outlook.timezone="Pacific Standard Time"',
}
async with httpx.AsyncClient() as client:
response = await client.get(graph_url, headers=headers)
if response.status_code != 200:
ERR("Failed to retrieve events from Microsoft 365")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve events",
)
ms_events = response.json().get("value", [])
return ms_events
async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]):
range_start = localize_dt(range_start)
range_end = localize_dt(range_end)
event_list = []
for event in events:
INFO(f"Event: {event}")
start_str = event.get('start')
end_str = event.get('end')
if isinstance(start_str, dict):
start_str = start_str.get('dateTime')
else:
INFO(f"Start date string not a dict")
if isinstance(end_str, dict):
end_str = end_str.get('dateTime')
else:
INFO(f"End date string not a dict")
try:
start_date = localize_dt(start_str) if start_str else None
except (ValueError, TypeError) as e:
ERR(f"Invalid start date format: {start_str}, error: {e}")
continue
try:
end_date = localize_dt(end_str) if end_str else None
except (ValueError, TypeError) as e:
ERR(f"Invalid end date format: {end_str}, error: {e}")
continue
DEBUG(f"Comparing {start_date} with range {range_start} to {range_end}")
if start_date:
# Ensure start_date is timezone-aware
start_date = localize_dt(start_date)
# If end_date is not provided, assume it's the same as start_date
if not end_date:
end_date = start_date
else:
end_date = localize_dt(end_date)
# Check if the event overlaps with the given range
if (start_date < range_end) and (end_date > range_start):
attendees = [{'name': att['name'], 'email': att['email']} for att in event.get('attendees', []) if 'name' in att and 'email' in att]
location = event.get('location', '')
if isinstance(location, dict):
location = location.get('displayName', '')
event_data = {
"name": event.get('subject', ''),
"uid": event.get('id', ''),
"start": start_date.strftime('%H:%M'),
"end": end_date.strftime('%H:%M') if end_date else '',
"description": event.get('bodyPreview', ''),
"attendees": attendees,
"location": location,
"url": event.get('onlineMeetingUrl', ''),
"busystatus": event.get('showAs', ''),
"busy": event.get('showAs', '') in ['busy', 'tentative'],
"all_day": event.get('isAllDay', False)
}
INFO(f"Event_data: {event_data}")
event_list.append(event_data)
else:
DEBUG(f"Event outside of specified range: {start_date} to {end_date}")
else:
ERR(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
return event_list

209
sijapi/routers/cf.py Normal file
View file

@ -0,0 +1,209 @@
'''
IN DEVELOPMENT - Cloudflare + Caddy module. Based on a bash script that's able to rapidly deploy new Cloudflare subdomains on new Caddy reverse proxy configurations, managing everything including restarting Caddy. The Python version needs more testing before actual use.
'''
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from fastapi.responses import PlainTextResponse, JSONResponse
from typing import Optional
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
import httpx
import asyncio
from asyncio import sleep
import os
cf = APIRouter()
class DNSRecordRequest(BaseModel):
full_domain: str
ip: Optional[str] = None
port: str
# Update to make get_zone_id async
async def get_zone_id(domain: str) -> str:
url = f"{CF_API_BASE_URL}/zones"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
params = {"name": domain}
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
if data['success']:
if len(data['result']) > 0:
return data['result'][0]['id']
else:
raise ValueError(f"No Zone ID found for domain '{domain}'")
else:
errors = ', '.join(err['message'] for err in data['errors'])
raise ValueError(f"Cloudflare API returned errors: {errors}")
async def update_caddyfile(full_domain, caddy_ip, port):
caddy_config = f"""
{full_domain} {{
reverse_proxy {caddy_ip}:{port}
tls {{
dns cloudflare {{"$CLOUDFLARE_API_TOKEN"}}
}}
}}
"""
with open(CADDYFILE_PATH, 'a') as file:
file.write(caddy_config)
# Using asyncio to create subprocess
proc = await asyncio.create_subprocess_exec("sudo", "systemctl", "restart", "caddy")
await proc.communicate()
# Retry mechanism for API calls
async def retry_request(url, headers, max_retries=5, backoff_factor=1):
for retry in range(max_retries):
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url, headers=headers)
response.raise_for_status()
return response
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
ERR(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
await sleep(backoff_factor * (2 ** retry))
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
# Helper function to load Caddyfile domains
def load_caddyfile_domains():
with open(CADDYFILE_PATH, 'r') as file:
caddyfile_content = file.read()
domains = []
for line in caddyfile_content.splitlines():
if line.strip() and not line.startswith('#'):
if "{" in line:
domain = line.split("{")[0].strip()
domains.append(domain)
return domains
# Endpoint to add new configuration to Cloudflare, Caddyfile, and cf_domains.json
@cf.post("/cf/add_config")
async def add_config(record: DNSRecordRequest):
full_domain = record.full_domain
caddy_ip = record.ip or "localhost"
port = record.port
# Extract subdomain and domain
parts = full_domain.split(".")
if len(parts) == 2:
domain = full_domain
subdomain = "@"
else:
subdomain = parts[0]
domain = ".".join(parts[1:])
zone_id = await get_zone_id(domain)
if not zone_id:
raise HTTPException(status_code=400, detail=f"Zone ID for {domain} could not be found")
# API call setup for Cloudflare A record
endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
data = {
"type": "A",
"name": subdomain,
"content": CF_IP,
"ttl": 120,
"proxied": True
}
async with httpx.AsyncClient() as client:
response = await client.post(endpoint, headers=headers, json=data)
result = response.json()
if not result.get("success", False):
error_message = result.get("errors", [{}])[0].get("message", "Unknown error")
error_code = result.get("errors", [{}])[0].get("code", "Unknown code")
raise HTTPException(status_code=400, detail=f"Failed to create A record: {error_message} (Code: {error_code})")
# Update Caddyfile
await update_caddyfile(full_domain, caddy_ip, port)
return {"message": "Configuration added successfully"}
@cf.get("/cf/list_zones")
async def list_zones_endpoint():
domains = await list_zones()
return JSONResponse(domains)
async def list_zones():
endpoint = f"{CF_API_BASE_URL}/zones"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
async with httpx.AsyncClient() as client: # async http call
response = await client.get(endpoint, headers=headers)
response.raise_for_status()
result = response.json()
if not result.get("success"):
raise HTTPException(status_code=400, detail="Failed to retrieve zones from Cloudflare")
zones = result.get("result", [])
domains = {}
for zone in zones:
zone_id = zone.get("id")
zone_name = zone.get("name")
domains[zone_name] = {"zone_id": zone_id}
records_endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
async with httpx.AsyncClient() as client: # async http call
records_response = await client.get(records_endpoint, headers=headers)
records_result = records_response.json()
if not records_result.get("success"):
raise HTTPException(status_code=400, detail=f"Failed to retrieve DNS records for zone {zone_name}")
records = records_result.get("result", [])
for record in records:
record_id = record.get("id")
domain_name = record.get("name").replace(f".{zone_name}", "")
domains[zone_name].setdefault(domain_name, {})["dns_id"] = record_id
return domains
@cf.get("/cf/compare_caddy", response_class=PlainTextResponse)
async def crossreference_caddyfile():
cf_domains_data = await list_zones()
caddyfile_domains = load_caddyfile_domains()
cf_domains_list = [
f"{sub}.{domain}" if sub != "@" else domain
for domain, data in cf_domains_data.items()
for sub in data.get("subdomains", {}).keys()
]
caddyfile_domains_set = set(caddyfile_domains)
cf_domains_set = set(cf_domains_list)
only_in_caddyfile = caddyfile_domains_set - cf_domains_set
only_in_cf_domains = cf_domains_set - caddyfile_domains_set
markdown_output = "# Cross-reference cf_domains.json and Caddyfile\n\n"
markdown_output += "## Domains only in Caddyfile:\n\n"
for domain in only_in_caddyfile:
markdown_output += f"- **{domain}**\n"
markdown_output += "\n## Domains only in cf_domains.json:\n\n"
for domain in only_in_cf_domains:
markdown_output += f"- **{domain}**\n"
return markdown_output

253
sijapi/routers/email.py Normal file
View file

@ -0,0 +1,253 @@
'''
IN DEVELOPMENT Email module. Uses IMAP and SMTP login credentials to monitor an inbox and summarize incoming emails that match certain criteria and save the Text-To-Speech converted summaries into a specified "podcast" folder.
UNIMPLEMENTED: AI auto-responder.
'''
from fastapi import APIRouter
import asyncio
from imbox import Imbox
from bs4 import BeautifulSoup
import os
from pathlib import Path
from shutil import move
import tempfile
import re
import ssl
from smtplib import SMTP_SSL
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from datetime import datetime as dt_datetime
from pydantic import BaseModel
from typing import List, Optional, Any
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, DATA_DIR, OBSIDIAN_VAULT_DIR, PODCAST_DIR, IMAP, OBSIDIAN_JOURNAL_DIR, DEFAULT_VOICE, AUTORESPONSE_BLACKLIST, AUTORESPONSE_WHITELIST, AUTORESPONSE_CONTEXT, USER_FULLNAME, USER_BIO, AUTORESPOND, TZ
from sijapi.routers import summarize, tts, llm
from sijapi.utilities import clean_text, assemble_journal_path, localize_dt, extract_text, prefix_lines
email = APIRouter(tags=["private"])
class Contact(BaseModel):
email: str
name: str
class EmailModel(BaseModel):
sender: str
recipients: List[Contact]
datetime_received: dt_datetime
subject: str
body: str
attachments: Optional[List[Any]] = None
def imap_conn():
return Imbox(IMAP.host,
username=IMAP.email,
password=IMAP.password,
port=IMAP.imap_port,
ssl=IMAP.imap_encryption == 'SSL',
starttls=IMAP.imap_encryption == 'STARTTLS')
def clean_email_content(html_content):
soup = BeautifulSoup(html_content, "html.parser")
return re.sub(r'[ \t\r\n]+', ' ', soup.get_text()).strip()
async def extract_attachments(attachments) -> List[str]:
attachment_texts = []
for attachment in attachments:
attachment_name = attachment.get('filename', 'tempfile.txt')
_, ext = os.path.splitext(attachment_name)
ext = ext.lower() if ext else '.txt'
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
tmp_file.write(attachment['content'].getvalue())
tmp_file_path = tmp_file.name
try:
attachment_text = await extract_text(tmp_file_path)
attachment_texts.append(attachment_text)
finally:
if os.path.exists(tmp_file_path):
os.remove(tmp_file_path)
return attachment_texts
async def process_unread_emails(auto_respond: bool = AUTORESPOND, summarize_emails: bool = True, podcast: bool = True):
while True:
try:
with imap_conn() as inbox:
unread_messages = inbox.messages(unread=True)
for uid, message in unread_messages:
recipients = [Contact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
this_email = EmailModel(
sender=message.sent_from[0]['email'],
datetime_received=localize_dt(message.date),
recipients=recipients,
subject=message.subject,
body=clean_email_content(message.body['html'][0]) if message.body['html'] else clean_email_content(message.body['plain'][0]) or "",
attachments=message.attachments
)
DEBUG(f"\n\nProcessing email: {this_email.subject}\n\n")
md_path, md_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".md")
tts_path, tts_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".wav")
if summarize_emails:
email_content = f'At {this_email.datetime_received}, {this_email.sender} sent an email with the subject line "{this_email.subject}". The email in its entirety reads: \n\n{this_email.body}\n"'
if this_email.attachments:
attachment_texts = await extract_attachments(this_email.attachments)
email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts])
summary = await summarize.summarize_text(email_content)
await tts.local_tts(text_content = summary, speed = 1.1, voice = DEFAULT_VOICE, podcast = podcast, output_path = tts_path)
if podcast:
if PODCAST_DIR.exists():
tts.copy_to_podcast_dir(tts_path)
else:
ERR(f"PODCAST_DIR does not exist: {PODCAST_DIR}")
save_email_as_markdown(this_email, summary, md_path, tts_relative)
else:
save_email_as_markdown(this_email, None, md_path, None)
if auto_respond and should_auto_respond(this_email):
DEBUG(f"Auto-responding to {this_email.subject}")
auto_response_subject = 'Auto-Response Re:' + this_email.subject
auto_response_body = await generate_auto_response_body(this_email)
DEBUG(f"Auto-response: {auto_response_body}")
await send_auto_response(this_email.sender, auto_response_subject, auto_response_body)
inbox.mark_seen(uid)
await asyncio.sleep(30)
except Exception as e:
ERR(f"An error occurred: {e}")
await asyncio.sleep(30)
def save_email_as_markdown(email: EmailModel, summary: str, md_path: Path, tts_path: Path):
'''
Saves an email as a markdown file in the specified directory.
Args:
email (EmailModel): The email object containing email details.
summary (str): The summary of the email.
tts_path (str): The path to the text-to-speech audio file.
'''
# Sanitize filename to avoid issues with filesystems
filename = f"{email.datetime_received.strftime('%Y%m%d%H%M%S')}_{email.subject.replace('/', '-')}.md".replace(':', '-').replace(' ', '_')
summary = prefix_lines(summary, '> ')
# Create the markdown content
markdown_content = f'''---
date: {email.datetime_received.strftime('%Y-%m-%d')}
tags:
- email
---
| | | |
| --: | :--: | :--: |
| *received* | **{email.datetime_received.strftime('%B %d, %Y at %H:%M:%S %Z')}** | |
| *from* | **[[{email.sender}]]** | |
| *to* | {', '.join([f'**[[{recipient}]]**' for recipient in email.recipients])} | |
| *subject* | **{email.subject}** | |
'''
if summary:
markdown_content += f'''
> [!summary] Summary
> {summary}
'''
if tts_path:
markdown_content += f'''
![[{tts_path}]]
'''
markdown_content += f'''
---
{email.body}
'''
with open(md_path, 'w', encoding='utf-8') as md_file:
md_file.write(markdown_content)
DEBUG(f"Saved markdown to {md_path}")
AUTORESPONSE_SYS = "You are a helpful AI assistant that generates personalized auto-response messages to incoming emails."
async def generate_auto_response_body(e: EmailModel, response_style: str = "professional") -> str:
age = dt_datetime.now(TZ) - e.datetime_received
prompt = f'''
Please generate a personalized auto-response to the following email. The email is from {e.sender} and was sent {age} ago with the subject line "{e.subject}." You are auto-responding on behalf of {USER_FULLNAME}, who is described by the following short bio (strictly for your context -- do not recite this in the response): "{USER_BIO}." {USER_FULLNAME} is unable to respond himself, because {AUTORESPONSE_CONTEXT}. Everything from here to ~~//END//~~ is the email body.
{e.body}
~~//END//~~
Keep your auto-response {response_style} and to the point, but do aim to make it responsive specifically to the sender's inquiry.
'''
try:
response = await llm.query_ollama(prompt, AUTORESPONSE_SYS, 400)
return response
except Exception as e:
ERR(f"Error generating auto-response: {str(e)}")
return "Thank you for your email. Unfortunately, an error occurred while generating the auto-response. We apologize for any inconvenience."
async def send_auto_response(to_email, subject, body):
try:
message = MIMEMultipart()
message['From'] = IMAP.email # smtp_username
message['To'] = to_email
message['Subject'] = subject
message.attach(MIMEText(body, 'plain'))
# DEBUG(f"Attempting to send auto_response to {to_email} concerning {subject}. We will use {IMAP.host}:{IMAP.smtp_port}, un: {IMAP.email}, pw: {IMAP.password}")
try:
DEBUG(f"Initiating attempt to send auto-response via SMTP at {IMAP.host}:{IMAP.smtp_port}...")
context = ssl._create_unverified_context()
with SMTP_SSL(IMAP.host, IMAP.smtp_port, context=context) as server:
server.login(IMAP.email, IMAP.password)
DEBUG(f"Successfully logged in to {IMAP.host} at {IMAP.smtp_port} as {IMAP.email}. Attempting to send email now.")
server.send_message(message)
INFO(f"Auto-response sent to {to_email} concerning {subject}")
except Exception as e:
ERR(f"Failed to send auto-response email to {to_email}: {e}")
raise e
except Exception as e:
ERR(f"Error in preparing/sending auto-response: {e}")
raise e
def should_auto_respond(email: EmailModel) -> bool:
def matches_list(item: str, email: EmailModel) -> bool:
if '@' in item:
if item in email.sender:
return True
else:
if item.lower() in email.subject.lower() or item.lower() in email.body.lower():
return True
return False
if AUTORESPONSE_WHITELIST:
for item in AUTORESPONSE_WHITELIST:
if matches_list(item, email):
if AUTORESPONSE_BLACKLIST:
for blacklist_item in AUTORESPONSE_BLACKLIST:
if matches_list(blacklist_item, email):
return False
return True
return False
else:
if AUTORESPONSE_BLACKLIST:
for item in AUTORESPONSE_BLACKLIST:
if matches_list(item, email):
return False
return True
@email.on_event("startup")
async def startup_event():
asyncio.create_task(process_unread_emails())

66
sijapi/routers/health.py Normal file
View file

@ -0,0 +1,66 @@
'''
Health check module. /health returns `'status': 'ok'`, /id returns TS_ID, /routers responds with a list of the active routers, /ip responds with the device's local IP, /ts_ip responds with its tailnet IP, and /wan_ip responds with WAN IP.
Depends on:
TS_ID, ROUTERS, LOGGER, SUBNET_BROADCAST
'''
import os
import httpx
import socket
from fastapi import APIRouter
from tailscale import Tailscale
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import TS_ID, ROUTERS, SUBNET_BROADCAST
health = APIRouter(tags=["public", "trusted", "private"])
@health.get("/health")
def get_health():
return {"status": "ok"}
@health.get("/id")
def get_health() -> str:
return TS_ID
@health.get("/routers")
def get_routers() -> str:
listrouters = ", ".join(ROUTERS)
return listrouters
@health.get("/ip")
def get_local_ip():
"""Get the server's local IP address."""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
s.connect((f'{SUBNET_BROADCAST}', 1))
IP = s.getsockname()[0]
except Exception:
IP = '127.0.0.1'
finally:
s.close()
return IP
@health.get("/wan_ip")
async def get_wan_ip():
"""Get the WAN IP address using Mullvad's API."""
async with httpx.AsyncClient() as client:
try:
response = await client.get('https://am.i.mullvad.net/json')
response.raise_for_status()
wan_info = response.json()
return wan_info.get('ip', 'Unavailable')
except Exception as e:
ERR(f"Error fetching WAN IP: {e}")
return "Unavailable"
@health.get("/ts_ip")
async def get_tailscale_ip():
"""Get the Tailscale IP address."""
tailnet = os.getenv("TAILNET")
api_key = os.getenv("TAILSCALE_API_KEY")
async with Tailscale(tailnet=tailnet, api_key=api_key) as tailscale:
devices = await tailscale.devices()
if devices:
# Assuming you want the IP of the first device in the list
return devices[0]['addresses'][0]
else:
return "No devices found"

358
sijapi/routers/hooks.py Normal file
View file

@ -0,0 +1,358 @@
'''
Webhook module for specific use cases.
Depends on:
LOGGER, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, MAC_UN, MAC_PW, MAC_ID, TS_TAILNET
'''
from fastapi import APIRouter, Request, BackgroundTasks, HTTPException, status
from fastapi.responses import JSONResponse, RedirectResponse
import httpx
import json
from pathlib import Path
import asyncio
from datetime import datetime
import os, io
from PyPDF2 import PdfReader
import aiohttp
import paramiko
import time
import subprocess
from pydantic import BaseModel
from typing import List, Optional
import requests
import base64
from hashlib import sha256
# from O365 import Account, FileSystemTokenBackend
from typing import List
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, MAC_UN, MAC_PW, MAC_ID, TS_TAILNET
hooks = APIRouter()
with open(CASETABLE_PATH, 'r') as file:
CASETABLE = json.load(file)
class WidgetUpdate(BaseModel):
text: Optional[str] = None
progress: Optional[str] = None
icon: Optional[str] = None
color: Optional[str] = None
url: Optional[str] = None
shortcut: Optional[str] = None
graph: Optional[str] = None
@hooks.get("/health_check")
def hook_health():
shellfish_health_check()
@hooks.post("/update_widget")
def hook_widget_update(update: WidgetUpdate):
shellfish_update_widget(update)
@hooks.get("/alert")
async def hook_alert(request: Request):
alert = request.query_params.get('alert')
if not alert:
raise HTTPException(status_code=400, detail='No alert provided.')
return await notify(alert)
@hooks.post("/alert/cd")
async def hook_changedetection(webhook_data: dict):
body = webhook_data.get("body", {})
message = body.get("message", "")
if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]):
filename = ALERTS_DIR / f"alert_{int(time.time())}.json"
filename.write_text(json.dumps(webhook_data, indent=4))
notify(message)
return {"status": "received"}
@hooks.post("/cl/search")
async def hook_cl_search(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
DEBUG(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
# Save the payload data
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(cl_search_process_result, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
@hooks.post("/cl/docket")
async def hook_cl_docket(request: Request):
client_ip = request.client.host
DEBUG(f"Received request from IP: {client_ip}")
data = await request.json()
await cl_docket(data, client_ip)
async def notify(alert: str):
try:
await notify_shellfish(alert)
if TS_ID == MAC_ID:
await notify_local(alert)
else:
await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to send alert: {str(e)}")
return {"message": alert}
async def notify_local(message: str):
await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
# Asynchronous remote notification using paramiko SSH
async def notify_remote(host: str, message: str, username: str = None, password: str = None, key_filename: str = None):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
connect_kwargs = {'hostname': host, 'username': username}
if key_filename:
connect_kwargs['key_filename'] = key_filename
else:
connect_kwargs['password'] = password
await asyncio.to_thread(ssh.connect, **connect_kwargs)
await asyncio.to_thread(ssh.exec_command, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
ssh.close()
async def notify_shellfish(alert: str):
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
iv = "ab5bbeb426015da7eedcee8bee3dffb7"
plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n"
openssl_command = [
"openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv
]
process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode())
if process.returncode != 0:
raise Exception(f"OpenSSL encryption failed: {stderr.decode()}")
base64_encoded = stdout.decode().strip()
url = f"https://secureshellfish.app/push/?user={user}&mutable"
headers = {"Content-Type": "text/plain"}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, data=base64_encoded) as response:
if response.status != 200:
raise Exception(f"Failed to send notification: {response.status_code}")
## SHELLFISH ##
def shellfish_health_check():
addresses = [
"https://api.sij.ai/health",
"http://100.64.64.20:4444/health",
"http://100.64.64.30:4444/health",
"http://100.64.64.11:4444/health",
"http://100.64.64.15:4444/health"
]
results = []
up_count = 0
for address in addresses:
try:
response = requests.get(address)
if response.status_code == 200:
results.append(f"{address} is up")
up_count += 1
else:
results.append(f"{address} returned status code {response.status_code}")
except requests.exceptions.RequestException:
results.append(f"{address} is down")
# Generate a simple text-based graph
graph = '|' * up_count + '.' * (len(addresses) - up_count)
text_update = "\n".join(results)
widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"]
output = shellfish_run_widget_command(widget_command)
return {"output": output, "graph": graph}
def shellfish_update_widget(update: WidgetUpdate):
widget_command = ["widget"]
if update.text:
widget_command.extend(["--text", update.text])
if update.progress:
widget_command.extend(["--progress", update.progress])
if update.icon:
widget_command.extend(["--icon", update.icon])
if update.color:
widget_command.extend(["--color", update.color])
if update.url:
widget_command.extend(["--url", update.url])
if update.shortcut:
widget_command.extend(["--shortcut", update.shortcut])
if update.graph:
widget_command.extend(["--text", update.graph])
output = shellfish_run_widget_command(widget_command)
return {"output": output}
def shellfish_run_widget_command(args: List[str]):
result = subprocess.run(args, capture_output=True, text=True, shell=True)
if result.returncode != 0:
raise HTTPException(status_code=500, detail=result.stderr)
return result.stdout
### COURTLISTENER FUNCTIONS ###
async def cl_docket(data, client_ip, background_tasks: BackgroundTasks):
payload = data['payload']
results = data['payload']['results']
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(cl_docket_process, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def cl_docket_process(result):
async with httpx.AsyncClient() as session:
await cl_docket_process_result(result, session)
async def cl_docket_process_result(result, session):
docket = str(result.get('docket'))
case_code, case_shortname = cl_case_details(docket)
date_filed = result.get('date_filed', 'No Date Filed')
try:
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
except ValueError:
date_filed_formatted = 'NoDateFiled'
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
DEBUG(f"Fetching CourtListener docket information for {docket}...")
data = await response.json()
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name']
DEBUG(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else:
DEBUG("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket'
case_name = 'NoCaseName'
for document in result.get('recap_documents', []):
filepath_ia = document.get('filepath_ia')
filepath_local = document.get('filepath_local')
if filepath_ia:
file_url = filepath_ia
DEBUG(f"Found IA file at {file_url}.")
elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
DEBUG(f"Found local file at {file_url}.")
else:
DEBUG(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue
document_number = document.get('document_number', 'NoDocumentNumber')
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
description = description[:50] # Truncate description
# case_shortname = case_name # TEMPORARY OVERRIDE
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(file_url, target_path, session)
DEBUG(f"Downloaded {file_name} to {target_path}")
def cl_case_details(docket):
case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
case_code = case_info.get("code")
short_name = case_info.get("shortname")
return case_code, short_name
async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
async with aiohttp.ClientSession() as session:
DEBUG(f"Attempting to download {url} to {path}.")
try:
async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403:
ERR(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return
response.raise_for_status()
# Check if the response content type is a PDF
content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf':
ERR(f"Invalid content type: {content_type}. Skipping download.")
return
# Create an in-memory buffer to store the downloaded content
buffer = io.BytesIO()
async for chunk in response.content.iter_chunked(1024):
buffer.write(chunk)
# Reset the buffer position to the beginning
buffer.seek(0)
# Validate the downloaded PDF content
try:
PdfReader(buffer)
except Exception as e:
ERR(f"Invalid PDF content: {str(e)}. Skipping download.")
return
# If the PDF is valid, write the content to the file on disk
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('wb') as file:
file.write(buffer.getvalue())
except Exception as e:
ERR(f"Error downloading file: {str(e)}")
async def cl_search_process_result(result):
async with httpx.AsyncClient() as session:
download_url = result.get('download_url')
court_id = result.get('court_id')
case_name_short = result.get('caseNameShort')
case_name = result.get('caseName')
DEBUG(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id
if case_name_short:
case_folder = case_name_short
else:
case_folder = case_name
file_name = download_url.split('/')[-1]
target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(download_url, target_path, session)
DEBUG(f"Downloaded {file_name} to {target_path}")

973
sijapi/routers/ig.py Normal file
View file

@ -0,0 +1,973 @@
'''
IN DEVELOPMENT: Instagram AI bot module.
'''
from fastapi import APIRouter, UploadFile
import os
import io
import copy
import re
import jwt
import json
from tqdm import tqdm
import pyotp
import time
import pytz
import requests
import tempfile
import random
import subprocess
import urllib.request
import uuid
from fastapi import APIRouter
from time import sleep
from datetime import timedelta, datetime as date
from PIL import Image
from pydantic import BaseModel
from typing import Dict, List, Optional
import instagrapi
from instagrapi import Client as igClient
from instagrapi.types import UserShort
from urllib.parse import urlparse
from instagrapi.exceptions import LoginRequired as ClientLoginRequiredError
import json
from ollama import Client as oLlama
from sd import sd
from dotenv import load_dotenv
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, COMFYUI_DIR
import io
from io import BytesIO
import base64
ig = APIRouter()
class IG_Request(BaseModel):
file: Optional[UploadFile] = None # upload a particular file to Instagram
profile: Optional[str] = None # specify the profile account to use (uses the shortnames defined per folders and the config file)
local_only: Optional[bool] = False # overrides all other settings to ensure images are generated locally and stay local
openai: Optional[str] = None # OpenAI API key; if included, will rely on it for DALL-E, GPT-4, and GPT-4-Vision unless otherwise overridden
llm: Optional[str] = "llama3" # if a valid OpenAI model name is provided, it will be used; otherwise it will attempt to match to an Ollama model (if one exists)
i2t: Optional[str] = "llava" # set to GPT-4-Vision to use the OpenAI image-2-text model, otherwise this will attempt to match to a vision-capable Ollama model
t2i: Optional[str] = None # set to DALL-E to use the OpenAI model, or use it to override the StableDiffusion workflow that's otherwise selected. Leave blank to use defaults per the config file
ig_post: Optional[str] = True # if given a value, will use this as the category of post; if given no value, willuse all categories unless ig_comment_only is enabled
ig_comment: Optional[str] = None # if given a value, will use this as the category of comment; if given no value, will use all categories unless ig_post_only is enabled
ig_comment_user: Optional[str] = None # target a particular user for comments
ig_comment_url: Optional[str] = None # target a particular ig url for comments
ghost_post: Optional[bool] = True # enable posting to Ghost
sleep_short: Optional[int] = 5 # average duration of short intervals (a few seconds is adequate; this is to simulate doomscrolling latency)
sleep_long: Optional[int] = 180 # agerage duration of long intervals (this should be about a minute at least; it simulates the time it takes to write a comment or prepare a post)
IG_PROFILE = os.getenv("IG_PROFILE")
IG_SHORT_SLEEP = int(os.getenv("IG_SHORT_SLEEP", 5))
IG_LONG_SLEEP = int(os.getenv("IG_LONG_SLEEP", 180))
IG_POST_GHOST = os.getenv("IG_POST_GHOST")
IG_VISION_LLM = os.getenv("IG_VISION_LLM")
IG_PROMPT_LLM = os.getenv("IG_PROMPT_LLM")
IG_IMG_GEN = os.getenv("IG_IMG_GEN", "ComfyUI")
IG_OUTPUT_PLATFORMS = os.getenv("IG_OUTPUT_PLATFORMS", "ig,ghost,obsidian").split(',')
SD_WORKFLOWS_DIR = os.path.join(COMFYUI_DIR, 'workflows')
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
IG_PROFILES_DIR = os.path.join(BASE_DIR, 'profiles')
IG_PROFILE_DIR = os.path.join(IG_PROFILES_DIR, PROFILE)
IG_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'images')
IG_PROFILE_CONFIG_PATH = os.path.join(IG_PROFILE_DIR, f'config.json')
IG_VIEWED_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'downloads')
with open(IG_PROFILE_CONFIG_PATH, 'r') as config_file:
PROFILE_CONFIG = json.load(config_file)
if not os.path.exists(IG_IMAGES_DIR):
os.makedirs(IG_IMAGES_DIR )
OPENAI_API_KEY=PROFILE_CONFIG.get("openai_key")
###################
### VALIDATION ###
##################
if args.profile and args.posttype and not args.custompost and not args.posttype in PROFILE_CONFIG["posts"]:
print ("ERROR: NO SUCH POST TYPE IS AVAILABLE FOR THIS PROFILE.")
if args.profile and args.commenttype and not args.commenttype in PROFILE_CONFIG["comments"]:
print ("ERROR: NO SUCH COMMENT TYPE IS AVAILABLE FOR THIS PROFILE.")
####################
### CLIENT SETUP ###
####################
cl = igClient(request_timeout=1)
IMG_GEN = OpenAI(api_key=OPENAI_API_KEY)
IMG_MODEL = "dall-e-3"
COMFYUI_URL = "http://localhost:8188"
CLIENT_ID = str(uuid.uuid4())
###############################
### INSTAGRAM & GHOST SETUP ###
###############################
IG_USERNAME = PROFILE_CONFIG.get("ig_name")
IG_PASSWORD = PROFILE_CONFIG.get("ig_pass")
IG_SECRET_KEY = PROFILE_CONFIG.get("ig_2fa_secret")
IG_SESSION_PATH = os.path.join(IG_PROFILE_DIR, f'credentials.json')
GHOST_API_URL=PROFILE_CONFIG.get("ghost_admin_url")
GHOST_API_KEY=PROFILE_CONFIG.get("ghost_admin_api_key")
GHOST_CONTENT_KEY=PROFILE_CONFIG.get("ghost_content_key")
########################
### LLM PROMPT SETUP ###
########################
IMG_PROMPT_SYS = PROFILE_CONFIG.get("img_prompt_sys")
IMG_DESCRIPTION_SYS = PROFILE_CONFIG.get("img_description_sys")
COMMENT_PROMPT_SYS = PROFILE_CONFIG.get("img_comment_sys")
HASHTAGS = PROFILE_CONFIG.get("preferred_hashtags", [])
IMAGE_URL = args.image_url
rollover_time = 1702605780
COMPLETED_MEDIA_LOG = os.path.join(IG_PROFILE_DIR, f'completed-media.txt')
TOTP = pyotp.TOTP(IG_SECRET_KEY)
SHORT = args.shortsleep
LONG = args.longsleep
def follow_by_username(username) -> bool:
"""
Follow a user, return true if successful false if not.
"""
userid = cl.user_id_from_username(username)
sleep(SHORT)
return cl.user_follow(userid)
def unfollow_by_username(username) -> bool:
"""
Unfollow a user, return true if successful false if not.
"""
userid = cl.user_id_from_username(username)
sleep(SHORT)
return cl.user_unfollow(userid)
def get_poster_of_post(shortcode):
media_info = cl.media_info_by_shortcode(shortcode)
poster_username = media_info.user.username
return(poster_username)
def get_followers(amount: int = 0) -> Dict[int, UserShort]:
"""
Get followers, return Dict of user_id and User object
"""
return cl.user_followers(cl.user_id, amount=amount)
def get_followers_usernames(amount: int = 0) -> List[str]:
"""
Get bot's followers usernames, return List of usernames
"""
followers = cl.user_followers(cl.user_id, amount=amount)
sleep(SHORT)
return [user.username for user in followers.values()]
def get_following(amount: int = 0) -> Dict[int, UserShort]:
"""
Get bot's followed users, return Dict of user_id and User object
"""
sleep(SHORT)
return cl.user_following(cl.user_id, amount=amount)
def get_user_media(username, amount=30):
"""
Fetch recent media for a given username, return List of medias
"""
DEBUG(f"Fetching recent media for {username}...")
user_id = cl.user_id_from_username(username)
medias = cl.user_medias(user_id, amount)
final_medias = []
for media in medias:
sleep(SHORT)
if media.media_type == 1:
final_medias.append(media)
return final_medias
def get_user_image_urls(username, amount=30) -> List[str]:
"""
Fetch recent media URLs for a given username, return List of media URLs
"""
DEBUG(f"Fetching recent media URLs for {username}...")
user_id = cl.user_id_from_username(username)
medias = cl.user_medias(user_id, amount)
urls = []
for media in medias:
sleep(SHORT)
if media.media_type == 1 and media.thumbnail_url:
urls.append(media.thumbnail_url)
return urls
def is_valid_url(url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception:
return False
def get_random_follower():
followers = cl.get_followers_usernames()
sleep(SHORT)
return random.choice(followers)
def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = None, amount:int = args.count):
if not ht_type:
ht_type = args.commentmode
DEBUG(f"Fetching {ht_type} media for hashtag: {hashtag}")
ht_medias = []
while True:
sleep(SHORT)
if ht_type == "top":
ht_medias.extend(cl.hashtag_medias_top(name=hashtag, amount=amount*10))
elif ht_type == "recent":
ht_medias.extend(cl.hashtag_medias_recent(name=hashtag, amount=amount*10))
filtered_medias = filter_medias(ht_medias, days_ago_max=days_ago_max)
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}")
if len(filtered_medias) >= amount:
DEBUG(f"Desired amount of {amount} filtered media reached.")
break
return filtered_medias
def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int = args.count):
if not ht_type:
ht_type = args.commentmode
DEBUG(f"Fetching {ht_type} media.")
filtered_medias = []
while len(filtered_medias) < amount:
hashtag = random.choice(HASHTAGS)
DEBUG(f"Using hashtag: {hashtag}")
fetched_medias = []
sleep(SHORT)
if ht_type == "top":
fetched_medias = cl.hashtag_medias_top(name=hashtag, amount=50) # Fetch a large batch to filter from
elif ht_type == "recent":
fetched_medias = cl.hashtag_medias_recent(name=hashtag, amount=50) # Same for recent
current_filtered_medias = filter_medias(fetched_medias, days_ago_max=days_ago_max)
filtered_medias.extend(current_filtered_medias)
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}")
# Trim the list if we've collected more than needed
if len(filtered_medias) > amount:
filtered_medias = filtered_medias[:amount]
DEBUG(f"Desired amount of {amount} filtered media reached.")
break
else:
DEBUG(f"Total filtered media count so far: {len(filtered_medias)}")
return filtered_medias
def filter_medias(
medias: List,
like_count_min=None,
like_count_max=None,
comment_count_min=None,
comment_count_max=None,
days_ago_max=None,
):
# Adjust to use your preferred timezone, for example, UTC
days_back = date.now(pytz.utc) - timedelta(days=days_ago_max) if days_ago_max else None
return [
media for media in medias
if (
(like_count_min is None or media.like_count >= like_count_min) and
(like_count_max is None or media.like_count <= like_count_max) and
(comment_count_min is None or media.comment_count >= comment_count_min) and
(comment_count_max is None or media.comment_count <= comment_count_max) and
(days_ago_max is None or (media.taken_at and media.taken_at > days_back)) and not
check_media_in_completed_lists(media)
)
]
def add_media_to_completed_lists(media):
"""
Add a media to the completed lists after interacting with it.
"""
with open(COMPLETED_MEDIA_LOG, 'a') as file:
file.write(f"{str(media.pk)}\n")
def check_media_in_completed_lists(media):
"""
Check if a media is in the completed lists.
"""
with open(COMPLETED_MEDIA_LOG, 'r') as file:
completed_media = file.read().splitlines()
return str(media.pk) in completed_media
def download_and_resize_image(url: str, download_path: str = None, max_dimension: int = 1200) -> str:
if not isinstance(url, str):
url = str(url)
parsed_url = urlparse(url)
if not download_path or not os.path.isdir(os.path.dirname(download_path)):
_, temp_file_extension = os.path.splitext(parsed_url.path)
if not temp_file_extension:
temp_file_extension = ".jpg" # Default extension if none is found
download_path = tempfile.mktemp(suffix=temp_file_extension, prefix="download_")
if url and parsed_url.scheme and parsed_url.netloc:
try:
os.makedirs(os.path.dirname(download_path), exist_ok=True)
with requests.get(url) as response:
response.raise_for_status() # Raises an HTTPError if the response was an error
image = Image.open(BytesIO(response.content))
# Resize the image, preserving aspect ratio
if max(image.size) > max_dimension:
image.thumbnail((max_dimension, max_dimension))
# Save the image, preserving the original format if possible
image_format = image.format if image.format else "jpg"
image.save(download_path, image_format)
return download_path
except Exception as e:
# Handle or log the error as needed
DEBUG(f"Error downloading or resizing image: {e}")
return None
def comment_on_user_media(user: str, comment_type: str = "default", amount=5):
"""
Comment on a user's media.
"""
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr']
medias = get_user_media(user, amount)
for media in medias:
if not check_media_in_completed_lists(media):
sleep(SHORT)
if media.thumbnail_url and is_valid_url(media.thumbnail_url):
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
if media_path is not None:
encoded_media = encode_image_to_base64(media_path)
comment_text = llava(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if comment_text:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on media: {media.pk}")
else:
DEBUG(f"Failed to generate comment for media: {media.pk}")
add_media_to_completed_lists(media)
sleep(SHORT)
else:
DEBUG(f"We received a nonetype! {media_path}")
else:
DEBUG(f"URL for {media.pk} disappeared it seems...")
else:
DEBUG(f"Media already interacted with: {media.pk}")
def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, hashtag: str = None):
"""
Comment on a hashtag's media.
"""
if not hashtag:
hashtag = random.choice(PROFILE_CONFIG['comments'][comment_type]['hashtags'])
medias = get_medias_by_hashtag(hashtag=hashtag, days_ago_max=7, amount=amount)
for media in medias:
if not check_media_in_completed_lists(media):
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
comment_text = None
if media_path and os.path.exists(media_path):
encoded_media = encode_image_to_base64(media_path)
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if (PROFILE_CONFIG['comments'][comment_type]['sentiment'] == "positive") and False is True:
try:
like_result = cl.media_like(media)
if like_result:
DEBUG(f"Liked media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Cannot like media {media.pk}: {str(e)}")
if comment_text:
try:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Cannot comment on media {media.pk}: {str(e)}")
else:
DEBUG(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}")
add_media_to_completed_lists(media)
sleep(SHORT)
else:
DEBUG(f"Media already interacted with: {media.pk}")
def comment_on_specific_media(media_url, comment_type: str = "default"):
"""
Comment on a specific media given its URL.
"""
media_id = cl.media_pk_from_url(media_url)
sleep(SHORT)
media = cl.media_info(media_id)
sleep(SHORT)
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
encoded_media = encode_image_to_base64(media_path)
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if comment_text:
try:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on specific media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}")
else:
DEBUG(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/")
def get_image(status_data, key):
"""Extract the filename and subfolder from the status data and read the file."""
try:
outputs = status_data.get("outputs", {})
images_info = outputs.get(key, {}).get("images", [])
if not images_info:
raise Exception("No images found in the job output.")
image_info = images_info[0] # Assuming the first image is the target
filename = image_info.get("filename")
subfolder = image_info.get("subfolder", "") # Default to empty if not present
file_path = os.path.join(COMFYUI_OUTPUT_DIR, subfolder, filename)
with open(file_path, 'rb') as file:
return file.read()
except KeyError as e:
raise Exception(f"Failed to extract image information due to missing key: {e}")
except FileNotFoundError:
raise Exception(f"File {filename} not found at the expected path {file_path}")
def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], path=None):
if path is None:
path = []
try:
if isinstance(workflow, dict):
for key, value in workflow.items():
current_path = path + [key]
if isinstance(value, dict):
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
found_key[0] = key
update_prompt(value, post, positive, found_key, current_path)
elif isinstance(value, list):
# Recursive call with updated path for each item in a list
for index, item in enumerate(value):
update_prompt(item, post, positive, found_key, current_path + [str(index)])
if value == "API_PPrompt":
workflow[key] = post.get(value, "") + positive
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
elif value == "API_SPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
elif value == "API_NPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999)
DEBUG(f"Updated seed to: {workflow[key]}")
elif (key == "width" or key == "max_width" or key == "scaled_width" or key == "side_length") and (value == 1023 or value == 1025):
# workflow[key] = post.get(value, "")
workflow[key] = post.get("width", 1024)
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
# workflow[key] = post.get(value, "")
workflow[key] = post.get("height", 1024)
except Exception as e:
DEBUG(f"Error in update_prompt at path {' -> '.join(path)}: {e}")
raise
return found_key[0]
def update_prompt_custom(workflow: dict, API_PPrompt: str, API_SPrompt: str, API_NPrompt: str, found_key=[None], path=None):
if path is None:
path = []
try:
if isinstance(workflow, dict):
for key, value in workflow.items():
current_path = path + [key]
if isinstance(value, dict):
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
found_key[0] = key
update_prompt(value, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path)
elif isinstance(value, list):
# Recursive call with updated path for each item in a list
for index, item in enumerate(value):
update_prompt(item, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path + [str(index)])
if value == "API_PPrompt":
workflow[key] = API_PPrompt
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
elif value == "API_SPrompt":
workflow[key] = API_SPrompt
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
elif value == "API_NPrompt":
workflow[key] = API_NPrompt
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999)
DEBUG(f"Updated seed to: {workflow[key]}")
elif (key == "width" or key == "max_width" or key == "scaled_width") and (value == 1023 or value == 1025):
workflow[key] = 1024
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
workflow[key] = 1024
except Exception as e:
DEBUG(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}")
raise
return found_key[0]
##################################
### IMAGE GENERATION FUNCTIONS ###
##################################
def image_gen(prompt: str, model: str):
response = IMG_GEN.images.generate(
model=model,
prompt=prompt,
size="1024x1024",
quality="standard",
n=1,
)
image_url = response.data[0].url
image_path = download_and_resize_image(image_url)
return image_path
def queue_prompt(prompt: dict):
response = requests.post(f"{COMFYUI_URL}/prompt", json={"prompt": prompt, "client_id": CLIENT_ID})
if response.status_code == 200:
return response.json().get('prompt_id')
else:
raise Exception(f"Failed to queue prompt. Status code: {response.status_code}, Response body: {response.text}")
def poll_status(prompt_id):
"""Poll the job status until it's complete and return the status data."""
start_time = time.time() # Record the start time
while True:
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
# Use \r to return to the start of the line, and end='' to prevent newline
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
if status_response.status_code != 200:
raise Exception("Failed to get job status")
status_data = status_response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
DEBUG()
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
time.sleep(1)
def poll_status(prompt_id):
"""Poll the job status until it's complete and return the status data."""
start_time = time.time() # Record the start time
while True:
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
# Use \r to return to the start of the line, and end='' to prevent newline
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
if status_response.status_code != 200:
raise Exception("Failed to get job status")
status_data = status_response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
DEBUG()
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
time.sleep(1)
################################
### PRIMARY ACTIVE FUNCTIONS ###
################################
def load_post(chosen_post: str = "default"):
if chosen_post in PROFILE_CONFIG['posts']:
post = PROFILE_CONFIG['posts'][chosen_post]
DEBUG(f"Loaded post for {chosen_post}")
else:
DEBUG(f"Unable to load post for {chosen_post}. Choosing a default post.")
chosen_post = choose_post(PROFILE_CONFIG['posts'])
post = PROFILE_CONFIG['posts'][chosen_post]
DEBUG(f"Defaulted to {chosen_post}")
return post
def handle_image_workflow(chosen_post=None):
"""
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
or posting to Instagram based on the local flag.
"""
if chosen_post is None:
chosen_post = choose_post(PROFILE_CONFIG['posts'])
post = load_post(chosen_post)
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
DEBUG(f"Workflow name: {workflow_name}")
DEBUG(f"Generating image concept for {chosen_post} and {workflow_name} now.")
image_concept = query_ollama(llmPrompt = post['llmPrompt'], max_tokens = 180) if args.local or not args.openai else query_gpt4(llmPrompt = post['llmPrompt'], max_tokens = 180)
DEBUG(f"Image concept for {chosen_post}: {image_concept}")
workflow_data = None
if args.fast:
workflow_data = load_json(None, f"{workflow_name}_fast")
if workflow_data is None:
workflow_data = load_json(None, workflow_name)
if args.dalle and not args.local:
jpg_file_path = image_gen(image_concept, "dall-e-3")
else:
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
DEBUG(f"Saved file key: {saved_file_key}")
prompt_id = queue_prompt(workflow_data)
DEBUG(f"Prompt ID: {prompt_id}")
status_data = poll_status(prompt_id)
image_data = get_image(status_data, saved_file_key)
if chosen_post == "landscape":
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 2880, 100)
else:
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
image_aftergen(jpg_file_path, chosen_post)
def handle_custom_image(custom_post: str):
"""
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
or posting to Instagram based on the local flag.
"""
if args.posttype:
post = load_post(args.posttype)
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
else:
workflow_name = args.workflow if args.workflow else "selfie"
post = {
"API_PPrompt": "",
"API_SPrompt": "; (((masterpiece))); (beautiful lighting:1), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3",
"Vision_Prompt": "Write an upbeat Instagram description with emojis to accompany this selfie!",
"frequency": 2,
"ghost_tags": [
"aigenerated",
"stablediffusion",
"sdxl",
],
}
workflow_data = load_json(None, workflow_name)
system_msg = "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words."
image_concept = query_ollama(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) if args.local or not args.openai else query_gpt4(system_msg=system_msg, user_msg=custom_post, max_tokens = 180)
DEBUG(f"Image concept: {image_concept}")
if args.dalle and not args.local:
jpg_file_path = image_gen(image_concept, "dall-e-3")
else:
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
DEBUG(f"Saved file key: {saved_file_key}")
prompt_id = queue_prompt(workflow_data)
DEBUG(f"Prompt ID: {prompt_id}")
status_data = poll_status(prompt_id)
image_data = get_image(status_data, saved_file_key)
chosen_post = args.posttype if args.posttype else "custom"
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
encoded_string = encode_image_to_base64(jpg_file_path)
vision_prompt = f"Write upbeat Instagram description accompany this image, which was created by AI using the following prompt: {image_concept}"
instagram_description = llava(encoded_string, vision_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, vision_prompt, 150)
image_aftergen(jpg_file_path, chosen_post, )
def image_aftergen(jpg_file_path: str, chosen_post: str = None, post: Dict = None, prompt: str = None):
if chosen_post and not prompt:
prompt = PROFILE_CONFIG['posts'][chosen_post]['Vision_Prompt']
encoded_string = encode_image_to_base64(jpg_file_path)
DEBUG(f"Image successfully encoded from {jpg_file_path}")
instagram_description = llava(encoded_string, prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, prompt, 150)
instagram_description = re.sub(r'^["\'](.*)["\']$', r'\1', instagram_description)
ghost_tags = post['ghost_tags'] if post else PROFILE_CONFIG['posts'][chosen_post]['ghost_tags']
title_prompt = f"Generate a short 3-5 word title for this image, which already includes the following description: {instagram_description}"
# Generate img_title based on the condition provided
img_title = llava(encoded_string, title_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, title_prompt, 150)
img_title = re.sub(r'^["\'](.*)["\']$', r'\1', img_title)
# Save description to file and upload or save locally
description_filename = jpg_file_path.rsplit('.', 1)[0] + ".txt"
description_path = os.path.join(IG_IMAGES_DIR, description_filename)
with open(description_path, "w") as desc_file:
desc_file.write(instagram_description)
# Initial markdown content creation
markdown_filename = jpg_file_path.rsplit('.', 1)[0] + ".md"
markdown_content = f"""# {img_title}
![{img_title}]({jpg_file_path})
---
{instagram_description}
---
Tags: {', '.join(ghost_tags)}
"""
with open(markdown_filename, "w") as md_file:
md_file.write(markdown_content)
DEBUG(f"Markdown file created at {markdown_filename}")
if args.wallpaper:
change_wallpaper(jpg_file_path)
DEBUG(f"Wallpaper changed.")
if not args.local:
ig_footer = ""
if not args.noig:
post_url = upload_photo(jpg_file_path, instagram_description)
DEBUG(f"Image posted at {post_url}")
ig_footer = f"\n<a href=\"{post_url}\">Instagram link</a>"
if not args.noghost:
ghost_text = f"{instagram_description}"
ghost_url = post_to_ghost(img_title, jpg_file_path, ghost_text, ghost_tags)
DEBUG(f"Ghost post: {ghost_url}\n{ig_footer}")
def choose_post(posts):
total_frequency = sum(posts[post_type]['frequency'] for post_type in posts)
random_choice = random.randint(1, total_frequency)
current_sum = 0
for post_type, post_info in posts.items():
current_sum += post_info['frequency']
if random_choice <= current_sum:
return post_type
def load_json(json_payload, workflow):
if json_payload:
return json.loads(json_payload)
elif workflow:
workflow_path = os.path.join(SD_WORKFLOWS_DIR, f"{workflow}.json" if not workflow.endswith('.json') else workflow)
with open(workflow_path, 'r') as file:
return json.load(file)
else:
raise ValueError("No valid input provided.")
def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, quality=80):
chosen_post = chosen_post if chosen_post else "custom"
filename_png = f"{prompt_id}.png"
category_dir = os.path.join(IG_IMAGES_DIR, chosen_post)
image_path_png = os.path.join(category_dir, filename_png)
try:
# Ensure the directory exists
os.makedirs(category_dir, exist_ok=True)
# Save the raw PNG data to a file
with open(image_path_png, 'wb') as file:
file.write(image_data)
# Open the PNG, resize it, and save it as jpg
with Image.open(image_path_png) as img:
# Resize image if necessary
if max(img.size) > max_size:
ratio = max_size / max(img.size)
new_size = tuple([int(x * ratio) for x in img.size])
img = img.resize(new_size, Image.Resampling.LANCZOS)
# Prepare the path for the converted image
new_file_name = f"{prompt_id}.jpg"
new_file_path = os.path.join(category_dir, new_file_name)
# Convert to jpg and save
img.convert('RGB').save(new_file_path, format='JPEG', quality=quality)
# Optionally, delete the temporary PNG file
os.remove(image_path_png)
return new_file_path
except Exception as e:
DEBUG(f"Error processing image: {e}")
return None
def upload_photo(path, caption, title: str=None):
DEBUG(f"Uploading photo from {path}...")
media = cl.photo_upload(path, caption)
post_url = f"https://www.instagram.com/p/{media.code}/"
return post_url
def format_duration(seconds):
"""Return a string representing the duration in a human-readable format."""
if seconds < 120:
return f"{int(seconds)} sec"
elif seconds < 6400:
return f"{int(seconds // 60)} min"
else:
return f"{seconds / 3600:.2f} hr"
########################
### HELPER FUNCTIONS ###
########################
import subprocess
def change_wallpaper(image_path):
command = """
osascript -e 'tell application "Finder" to set desktop picture to POSIX file "{}"'
""".format(image_path)
subprocess.run(command, shell=True)
def sleep(seconds):
"""Sleep for a random amount of time, approximately the given number of seconds."""
sleepupto(seconds*0.66, seconds*1.5)
def sleepupto(min_seconds, max_seconds=None):
interval = random.uniform(min_seconds if max_seconds is not None else 0, max_seconds if max_seconds is not None else min_seconds)
start_time = time.time()
end_time = start_time + interval
with tqdm(total=interval, desc=f"Sleeping for {format_duration(interval)}", unit=" sec", ncols=75, bar_format='{desc}: {bar} {remaining}') as pbar:
while True:
current_time = time.time()
elapsed_time = current_time - start_time
remaining_time = end_time - current_time
if elapsed_time >= interval:
break
duration = min(1, interval - elapsed_time) # Adjust sleep time to not exceed interval
time.sleep(duration)
pbar.update(duration)
# Update remaining time display
pbar.set_postfix_str(f"{format_duration(remaining_time)} remaining")
########################
### GHOST FUNCTIONS ###
########################
def generate_jwt_token():
key_id, key_secret = GHOST_API_KEY.split(':')
iat = int(date.now().timestamp())
exp = iat + 5 * 60 # Token expiration time set to 5 minutes from now for consistency with the working script
payload = {
'iat': iat,
'exp': exp,
'aud': '/admin/' # Adjusted to match the working script
}
token = jwt.encode(payload, bytes.fromhex(key_secret), algorithm='HS256', headers={'kid': key_id})
return token.decode('utf-8') if isinstance(token, bytes) else token # Ensure the token is decoded to UTF-8 string
def post_to_ghost(title, image_path, html_content, ghost_tags):
jwt_token = generate_jwt_token()
ghost_headers = {'Authorization': f'Ghost {jwt_token}'}
# Upload the image to Ghost
with open(image_path, 'rb') as f:
files = {'file': (os.path.basename(image_path), f, 'image/jpg')}
image_response = requests.post(f"{GHOST_API_URL}/images/upload/", headers=ghost_headers, files=files)
image_response.raise_for_status() # Ensure the request was successful
image_url = image_response.json()['images'][0]['url']
# Prepare the post content
updated_html_content = f'<img src="{image_url}" alt="Image"/><hr/> {html_content}'
mobiledoc = {
"version": "0.3.1",
"atoms": [],
"cards": [["html", {"cardName": "html", "html": updated_html_content}]],
"markups": [],
"sections": [[10, 0]]
}
mobiledoc = json.dumps(mobiledoc)
post_data = {
'posts': [{
'title': title,
'mobiledoc': mobiledoc,
'status': 'published',
'tags': ghost_tags
}]
}
# Create a new post
post_response = requests.post(f"{GHOST_API_URL}/posts/", json=post_data, headers=ghost_headers)
post_response.raise_for_status()
post_url = post_response.json()['posts'][0]['url']
return post_url
########################################################
@ig.post("/ig/flow")
async def ig_flow_endpoint(new_session: bool = False):
current_unix_time = int(date.now().timestamp())
time_since_rollover = current_unix_time - rollover_time
time_remaining = 30 - (time_since_rollover % 30)
if time_remaining < 4:
DEBUG("Too close to end of TOTP counter. Waiting.")
sleepupto(5, 5)
if not new_session and os.path.exists(IG_SESSION_PATH):
cl.load_settings(IG_SESSION_PATH)
DEBUG("Loaded past session.")
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
cl.dump_settings(IG_SESSION_PATH)
DEBUG("Logged in and saved new session.")
else:
raise Exception(f"Failed to login as {IG_USERNAME}.")

484
sijapi/routers/llm.py Normal file
View file

@ -0,0 +1,484 @@
#routers/llm.py
from fastapi import APIRouter, HTTPException, Request, Response
from fastapi.responses import StreamingResponse, JSONResponse
from starlette.responses import StreamingResponse
from datetime import datetime as dt_datetime
from dateutil import parser
from typing import List, Dict, Any, Union
from pydantic import BaseModel, root_validator, ValidationError
import aiofiles
import os
import glob
import chromadb
from openai import OpenAI
import uuid
import json
import base64
from pathlib import Path
import ollama
from ollama import AsyncClient as Ollama, list as OllamaList
import aiofiles
import time
import asyncio
from pathlib import Path
from fastapi import FastAPI, Request, HTTPException, APIRouter
from fastapi.responses import JSONResponse, StreamingResponse
from dotenv import load_dotenv
from sijapi import BASE_DIR, DATA_DIR, LOGS_DIR, CONFIG_DIR, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import convert_to_unix_time, sanitize_filename
llm = APIRouter()
# Initialize chromadb client
client = chromadb.Client()
OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
# Function to read all markdown files in the folder
def read_markdown_files(folder: Path):
file_paths = glob.glob(os.path.join(folder, "*.md"))
documents = []
for file_path in file_paths:
with open(file_path, 'r', encoding='utf-8') as file:
documents.append(file.read())
return documents, file_paths
# Read markdown files and generate embeddings
documents, file_paths = read_markdown_files(DOC_DIR)
for i, doc in enumerate(documents):
response = ollama.embeddings(model="mxbai-embed-large", prompt=doc)
embedding = response["embedding"]
OBSIDIAN_CHROMADB_COLLECTION.add(
ids=[file_paths[i]],
embeddings=[embedding],
documents=[doc]
)
# Function to retrieve the most relevant document given a prompt
@llm.get("/retrieve_document/{prompt}")
async def retrieve_document(prompt: str):
response = ollama.embeddings(
prompt=prompt,
model="mxbai-embed-large"
)
results = OBSIDIAN_CHROMADB_COLLECTION.query(
query_embeddings=[response["embedding"]],
n_results=1
)
return {"document": results['documents'][0][0]}
# Function to generate a response using RAG
@llm.get("/generate_response/{prompt}")
async def generate_response(prompt: str):
data = await retrieve_document(prompt)
output = ollama.generate(
model="llama2",
prompt=f"Using this data: {data['document']}. Respond to this prompt: {prompt}"
)
return {"response": output['response']}
async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, max_tokens: int = 200):
messages = [{"role": "system", "content": sys},
{"role": "user", "content": usr}]
LLM = Ollama()
response = await LLM.chat(model=DEFAULT_LLM, messages=messages, options={"num_predict": max_tokens})
DEBUG(response)
if "message" in response:
if "content" in response["message"]:
content = response["message"]["content"]
return content
else:
DEBUG("No choices found in response")
return None
def is_vision_request(content):
return False
@llm.post("/v1/chat/completions")
async def chat_completions(request: Request):
body = await request.json()
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f")
filename = REQUESTS_DIR / f"request_{timestamp}.json"
async with aiofiles.open(filename, mode='w') as file:
await file.write(json.dumps(body, indent=4))
messages = body.get('messages')
if not messages:
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
requested_model = body.get('model', 'default-model')
DEBUG(f"Requested model: {requested_model}")
stream = body.get('stream')
token_limit = body.get('max_tokens') or body.get('num_predict')
# Check if the most recent message contains an image_url
recent_message = messages[-1]
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
DEBUG("Processing as a vision request")
model = "llava"
DEBUG(f"Using model: {model}")
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
else:
DEBUG("Processing as a standard request")
model = requested_model
DEBUG(f"Using model: {model}")
if stream:
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
else:
response_data = await generate_messages(messages, model)
return JSONResponse(response_data, media_type="application/json")
async def stream_messages(messages: list, model: str = "llama3", num_predict: int = 300):
async with Ollama() as async_client:
try:
index = 0
async for part in async_client.chat(model=model, messages=messages, stream=True, options={'num_predict': num_predict}):
yield "data: " + json.dumps({
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": index,
"delta": {"role": "assistant", "content": part['message']['content']},
"logprobs": None,
"finish_reason": None if 'finish_reason' not in part else part['finish_reason']
}]
}) + "\n\n"
index += 1
except Exception as e:
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
yield "data: [DONE]\n\n"
async def stream_messages_with_vision(message: dict, model: str, num_predict: int = 300):
async with Ollama() as async_client:
try:
if isinstance(message.get('content'), list):
content = message['content']
for part in content:
if part['type'] == 'image_url' and 'url' in part['image_url']:
image_url = part['image_url']['url']
if image_url.startswith('data:image'):
# Convert base64 to bytes
image_data = base64.b64decode(image_url.split('base64,')[1])
response_generator = await async_client.generate(
model=model,
prompt='explain this image:',
images=[image_data],
stream=True,
options={'num_predict': num_predict}
)
index = 0
async for response in response_generator:
yield "data: " + json.dumps({
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": index,
"delta": {"role": "assistant", "content": response['response']},
"logprobs": None,
"finish_reason": None if 'finish_reason' not in response else response['finish_reason']
}]
}) + "\n\n"
index += 1
except Exception as e:
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
yield "data: [DONE]\n\n"
def get_appropriate_model(requested_model):
if requested_model == "gpt-4-vision-preview":
return DEFAULT_VISION
elif not is_model_available(requested_model):
return DEFAULT_LLM
else:
return requested_model
def is_vision_request(content):
if isinstance(content, list):
return any(isinstance(msg, dict) and msg.get('type') == 'image_url' for msg in content)
return False
@llm.get("/v1/models")
async def get_models():
model_data = OllamaList()
formatted_models = []
for model in model_data['models']:
model_id = model['name'].split(':')[0]
formatted_models.append({
"id": model_id,
"object": "model",
"created": convert_to_unix_time(model['modified_at']),
"owned_by": "sij"
})
return JSONResponse({
"object": "list",
"data": formatted_models
})
async def generate_messages(messages: list, model: str = "llama3"):
async_client = Ollama()
try:
response = await async_client.chat(model=model, messages=messages, stream=False)
return {
"model": model,
"choices": [{
"message": {
"role": "assistant",
"content": response['message']['content']
}
}]
}
except Exception as e:
return {"error": f"Error: {str(e)}"}
def is_model_available(model_name):
model_data = OllamaList()
available_models = [model['name'] for model in model_data['models']]
DEBUG(f"Available models: {available_models}") # Log using the configured LOGGER
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
if len(matching_models) == 1:
DEBUG(f"Unique match found: {matching_models[0]}")
return True
elif len(matching_models) > 1:
ERR(f"Ambiguous match found, models: {matching_models}")
return True
else:
ERR(f"No match found for model: {model_name}")
return False
@llm.options("/chat/completions")
@llm.options("/v1/chat/completions")
async def chat_completions_options(request: Request):
return JSONResponse(
content={
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "To use the chat completions endpoint, make a POST request to /v1/chat/completions with a JSON payload containing the 'messages' array. Each message should have a 'role' (either 'system', 'user', or 'assistant') and 'content' (the message text). You can optionally specify the 'model' to use. The response will be a JSON object containing the generated completions."
},
"finish_reason": "stop"
}
],
"created": int(time.time()),
"id": str(uuid.uuid4()),
"model": DEFAULT_LLM,
"object": "chat.completion.chunk",
},
status_code=200,
headers={
"Accept": "application/json",
"Content-Type": "application/json",
"Allow": "OPTIONS, POST",
},
)
#### EMBEDDINGS
class EmbeddingRequest(BaseModel):
model: str
input: Union[str, List[str], None] = None
prompt: Union[str, List[str], None] = None
@root_validator(pre=True)
def ensure_list(cls, values):
input_value = values.get('input')
prompt_value = values.get('prompt')
if input_value and isinstance(input_value, str):
values['input'] = [input_value]
if prompt_value and isinstance(prompt_value, str):
values['prompt'] = [prompt_value]
if input_value and not prompt_value:
values['prompt'] = values['input']
values['input'] = None
return values
class EmbeddingResponse(BaseModel):
object: str
data: List[Dict[str, Any]]
model: str
usage: Dict[str, int]
@llm.post("/api/embeddings", response_model=EmbeddingResponse)
@llm.post("/v1/embeddings", response_model=EmbeddingResponse)
async def create_embedding(request: EmbeddingRequest):
try:
combined_input = " ".join(request.prompt)
response = ollama.embeddings(model=request.model, prompt=combined_input)
embedding_list = response.get("embedding", [])
data = [{
"object": "embedding",
"index": 0,
"embedding": embedding_list
}]
result = {
"object": "list",
"data": data,
"model": request.model,
"usage": {"prompt_tokens": 5, "total_tokens": 5} # Example token counts
}
return result
except ValidationError as e:
raise HTTPException(status_code=422, detail=e.errors())
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@llm.options("/api/embeddings")
@llm.options("/v1/embeddings")
async def options_embedding():
return JSONResponse(
content={},
headers={
"Allow": "OPTIONS, POST",
"Content-Type": "application/json",
"Access-Control-Allow-Methods": "OPTIONS, POST",
"Access-Control-Allow-Headers": "Content-Type"
}
)
###### PORTED FROM IGBOT, NEEDS TO BE UPDATED FOR THIS ENVIRONMENT AND MADE ASYNC: #####
def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", max_tokens: int = 150):
messages = llmPrompt if llmPrompt else [
{"role": "system", "content": system_msg},
{"role": "user", "content": user_msg}
]
LLM = OpenAI(api_key=OPENAI_API_KEY)
response = LLM.chat.completions.create(
model="gpt-4",
messages=messages,
max_tokens=max_tokens
)
if hasattr(response, "choices") and response.choices: # Checks if 'choices' attribute exists and is not empty
first_choice = response.choices[0]
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
return first_choice.message.content
else:
DEBUG("No content attribute in the first choice's message")
DEBUG(f"No content found in message string: {response.choices}")
DEBUG("Trying again!")
query_gpt4(messages, max_tokens)
else:
DEBUG(f"No content found in message string: {response}")
return ""
def llava(image_base64, prompt):
VISION_LLM = Ollama(host='http://localhost:11434')
response = VISION_LLM.generate(
model = 'llava',
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
images = [image_base64]
)
DEBUG(response)
return "" if "pass" in response["response"].lower() else response["response"]
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
VISION_LLM = OpenAI(api_key=OPENAI_API_KEY)
response_1 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "system",
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
},
{
"role": "user",
"content": [
{"type": "text", "text": f"{prompt_usr}"},
{"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}}
],
}
],
max_tokens=max_tokens,
stream=False
)
if response_1 and response_1.choices:
if len(response_1.choices) > 0:
first_choice = response_1.choices[0]
if first_choice.message and first_choice.message.content:
comment_content = first_choice.message.content
if "PASS" in comment_content:
return ""
DEBUG(f"Generated comment: {comment_content}")
response_2 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "system",
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
},
{
"role": "user",
"content": [
{"type": "text", "text": f"{prompt_usr}"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpg;base64,{image_base64}"
},
},
],
},
{
"role": "assistant",
"content": comment_content
},
{
"role": "user",
"content": "Please refine it, and remember to ONLY include the caption or comment, nothing else! That means no preface, no postscript, no notes, no reflections, and not even any acknowledgment of this follow-up message. I need to be able to use your output directly on social media. Do include emojis though."
}
],
max_tokens=max_tokens,
stream=False
)
if response_2 and response_2.choices:
if len(response_2.choices) > 0:
first_choice = response_2.choices[0]
if first_choice.message and first_choice.message.content:
final_content = first_choice.message.content
DEBUG(f"Generated comment: {final_content}")
if "PASS" in final_content:
return ""
else:
return final_content
DEBUG("Vision response did not contain expected data.")
DEBUG(f"Vision response: {response_1}")
asyncio.sleep(15)
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
return try_again

540
sijapi/routers/locate.py Normal file
View file

@ -0,0 +1,540 @@
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import HTMLResponse, JSONResponse
import requests
import json
import time
import pytz
import traceback
from datetime import datetime, timezone
from typing import Union, List
import asyncio
import pytz
import folium
import time as timer
from pathlib import Path
from pydantic import BaseModel
from typing import Optional, Any, Dict, List, Union
from datetime import datetime, timedelta, time
from sijapi import LOCATION_OVERRIDES, TZ
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import get_db_connection, haversine, localize_dt
# from osgeo import gdal
# import elevation
locate = APIRouter()
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
from datetime import datetime
import requests
class Location(BaseModel):
latitude: float
longitude: float
datetime: datetime
elevation: Optional[float] = None
altitude: Optional[float] = None
zip: Optional[str] = None
street: Optional[str] = None
city: Optional[str] = None
state: Optional[str] = None
country: Optional[str] = None
context: Optional[Dict[str, Any]] = None
class_: Optional[str] = None
type: Optional[str] = None
name: Optional[str] = None
display_name: Optional[str] = None
boundingbox: Optional[List[str]] = None
amenity: Optional[str] = None
house_number: Optional[str] = None
road: Optional[str] = None
quarter: Optional[str] = None
neighbourhood: Optional[str] = None
suburb: Optional[str] = None
county: Optional[str] = None
country_code: Optional[str] = None
class Config:
json_encoders = {
datetime: lambda dt: dt.isoformat(),
}
def reverse_geocode(latitude: float, longitude: float) -> Optional[Location]:
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={latitude}&lon={longitude}"
INFO(f"Calling Nominatim API at {url}")
headers = {
'User-Agent': 'sij.law/1.0 (sij@sij.law)', # replace with your app name and email
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an exception for unsuccessful requests
data = response.json()
address = data.get("address", {})
location = Location(
latitude=float(data.get("lat", latitude)),
longitude=float(data.get("lon", longitude)),
datetime=datetime.now(), # You might want to adjust this based on your needs
zip=address.get("postcode"),
street=address.get("road"),
city=address.get("city"),
state=address.get("state"),
country=address.get("country"),
context={}, # Initialize with an empty dict, to be filled as needed
class_=data.get("class"),
type=data.get("type"),
name=data.get("name"),
display_name=data.get("display_name"),
boundingbox=data.get("boundingbox"),
amenity=address.get("amenity"),
house_number=address.get("house_number"),
road=address.get("road"),
quarter=address.get("quarter"),
neighbourhood=address.get("neighbourhood"),
suburb=address.get("suburb"),
county=address.get("county"),
country_code=address.get("country_code")
)
INFO(f"Created Location object: {location}")
return location
except requests.exceptions.RequestException as e:
ERR(f"Error: {e}")
return None
## NOT YET IMPLEMENTED
async def geocode(zip_code: Optional[str] = None, latitude: Optional[float] = None, longitude: Optional[float] = None, city: Optional[str] = None, state: Optional[str] = None, country_code: str = 'US') -> Location:
if (latitude is None or longitude is None) and (zip_code is None) and (city is None or state is None):
ERR(f"Must provide sufficient information for geocoding!")
return None
try:
# Establish the database connection
conn = get_db_connection()
# Build the SQL query based on the provided parameters
query = "SELECT id, street, city, state, country, latitude, longitude, zip, elevation, datetime, date, ST_Distance(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326)) AS distance FROM Locations"
conditions = []
params = []
if latitude is not None and longitude is not None:
conditions.append("ST_DWithin(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326), 50000)") # 50 km radius
params.extend([longitude, latitude])
if zip_code:
conditions.append("zip = $3 AND country = $4")
params.extend([zip_code, country_code])
if city and state:
conditions.append("city ILIKE $5 AND state ILIKE $6 AND country = $7")
params.extend([city, state, country_code])
if conditions:
query += " WHERE " + " OR ".join(conditions)
query += " ORDER BY distance LIMIT 1;"
DEBUG(f"Executing query: {query} with params: {params}")
# Execute the query with the provided parameters
result = await conn.fetchrow(query, *params)
# Close the connection
await conn.close()
if result:
location_info = Location(
latitude=result['latitude'],
longitude=result['longitude'],
datetime=result.get['datetime'],
zip=result['zip'],
street=result.get('street', ''),
city=result['city'],
state=result['state'],
country=result['country'],
elevation=result.get('elevation', 0),
distance=result.get('distance')
)
DEBUG(f"Found location: {location_info}")
return location_info
else:
DEBUG("No location found with provided parameters.")
return Location()
except Exception as e:
ERR(f"Error occurred: {e}")
raise Exception("An error occurred while processing your request")
def find_override_locations(lat: float, lon: float) -> Optional[str]:
# Load the JSON file
with open(LOCATION_OVERRIDES, 'r') as file:
locations = json.load(file)
closest_location = None
closest_distance = float('inf')
# Iterate through each location entry in the JSON
for location in locations:
loc_name = location.get("name")
loc_lat = location.get("latitude")
loc_lon = location.get("longitude")
loc_radius = location.get("radius")
# Calculate distance using haversine
distance = haversine(lat, lon, loc_lat, loc_lon)
# Check if the distance is within the specified radius
if distance <= loc_radius:
if distance < closest_distance:
closest_distance = distance
closest_location = loc_name
return closest_location
def get_elevation(latitude, longitude):
url = "https://api.open-elevation.com/api/v1/lookup"
payload = {
"locations": [
{
"latitude": latitude,
"longitude": longitude
}
]
}
try:
response = requests.post(url, json=payload)
response.raise_for_status() # Raise an exception for unsuccessful requests
data = response.json()
if "results" in data:
elevation = data["results"][0]["elevation"]
return elevation
else:
return None
except requests.exceptions.RequestException as e:
ERR(f"Error: {e}")
return None
async def fetch_locations(start: datetime, end: datetime = None) -> List[Location]:
start_datetime = localize_dt(start)
if end is None:
end_datetime = localize_dt(start_datetime.replace(hour=23, minute=59, second=59))
else:
end_datetime = localize_dt(end)
if start_datetime.time() == datetime.min.time() and end.time() == datetime.min.time():
end_datetime = end_datetime.replace(hour=23, minute=59, second=59)
DEBUG(f"Fetching locations between {start_datetime} and {end_datetime}")
conn = await get_db_connection()
locations = []
# Check for records within the specified datetime range
range_locations = await conn.fetch('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street,
action, device_type, device_model, device_name, device_os
FROM locations
WHERE datetime >= $1 AND datetime <= $2
ORDER BY datetime DESC
''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None))
DEBUG(f"Range locations query returned: {range_locations}")
locations.extend(range_locations)
if not locations and (end is None or start_datetime.date() == end.date()):
location_data = await conn.fetchrow('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street,
action, device_type, device_model, device_name, device_os
FROM locations
WHERE datetime < $1
ORDER BY datetime DESC
LIMIT 1
''', start_datetime.replace(tzinfo=None))
DEBUG(f"Fallback query returned: {location_data}")
if location_data:
locations.append(location_data)
await conn.close()
DEBUG(f"Locations found: {locations}")
# Sort location_data based on the datetime field in descending order
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
# Create Location objects directly from the location data
location_objects = [Location(
latitude=loc['latitude'],
longitude=loc['longitude'],
datetime=loc['datetime'],
elevation=loc.get('elevation'),
city=loc.get('city'),
state=loc.get('state'),
zip=loc.get('zip'),
street=loc.get('street'),
context={
'action': loc.get('action'),
'device_type': loc.get('device_type'),
'device_model': loc.get('device_model'),
'device_name': loc.get('device_name'),
'device_os': loc.get('device_os')
}
) for loc in sorted_locations if loc['latitude'] is not None and loc['longitude'] is not None]
return location_objects if location_objects else []
# Function to fetch the last location before the specified datetime
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
datetime = localize_dt(datetime)
DEBUG(f"Fetching last location before {datetime}")
conn = await get_db_connection()
location_data = await conn.fetchrow('''
SELECT id, datetime,
ST_X(ST_AsText(location)::geometry) AS longitude,
ST_Y(ST_AsText(location)::geometry) AS latitude,
ST_Z(ST_AsText(location)::geometry) AS elevation,
city, state, zip, street, country,
action
FROM locations
WHERE datetime < $1
ORDER BY datetime DESC
LIMIT 1
''', datetime.replace(tzinfo=None))
await conn.close()
if location_data:
DEBUG(f"Last location found: {location_data}")
return Location(**location_data)
else:
DEBUG("No location found before the specified datetime")
return None
@locate.get("/map/start_date={start_date_str}&end_date={end_date_str}", response_class=HTMLResponse)
async def generate_map_endpoint(start_date_str: str, end_date_str: str):
try:
start_date = localize_dt(start_date_str)
end_date = localize_dt(end_date_str)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
html_content = await generate_map(start_date, end_date)
return HTMLResponse(content=html_content)
@locate.get("/map", response_class=HTMLResponse)
async def generate_alltime_map_endpoint():
try:
start_date = localize_dt(datetime.fromisoformat("2022-01-01"))
end_date = localize_dt(datetime.now())
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
html_content = await generate_map(start_date, end_date)
return HTMLResponse(content=html_content)
async def generate_map(start_date: datetime, end_date: datetime):
locations = await fetch_locations(start_date, end_date)
if not locations:
raise HTTPException(status_code=404, detail="No locations found for the given date range")
# Create a folium map centered around the first location
map_center = [locations[0].latitude, locations[0].longitude]
m = folium.Map(location=map_center, zoom_start=5)
# Add markers for each location
for loc in locations:
folium.Marker(
location=[loc.latitude, loc.longitude],
popup=f"{loc.city}, {loc.state}<br>Elevation: {loc.elevation}m<br>Date: {loc.datetime}",
tooltip=f"{loc.city}, {loc.state}"
).add_to(m)
# Save the map to an HTML file and return the HTML content
map_html = "map.html"
m.save(map_html)
with open(map_html, 'r') as file:
html_content = file.read()
return html_content
async def post_location(location: Location):
DEBUG(f"post_location called with {location.datetime}")
conn = await get_db_connection()
try:
context = location.context or {}
action = context.get('action', 'manual')
device_type = context.get('device_type', 'Unknown')
device_model = context.get('device_model', 'Unknown')
device_name = context.get('device_name', 'Unknown')
device_os = context.get('device_os', 'Unknown')
# Parse and localize the datetime
localized_datetime = localize_dt(location.datetime)
await conn.execute('''
INSERT INTO locations (datetime, location, city, state, zip, street, action, device_type, device_model, device_name, device_os)
VALUES ($1, ST_SetSRID(ST_MakePoint($2, $3, $4), 4326), $5, $6, $7, $8, $9, $10, $11, $12, $13)
''', localized_datetime, location.longitude, location.latitude, location.elevation, location.city, location.state, location.zip, location.street, action, device_type, device_model, device_name, device_os)
await conn.close()
INFO(f"Successfully posted location: {location.latitude}, {location.longitude} on {localized_datetime}")
return {
'datetime': localized_datetime,
'latitude': location.latitude,
'longitude': location.longitude,
'city': location.city,
'state': location.state,
'zip': location.zip,
'street': location.street,
'elevation': location.elevation,
'action': action,
'device_type': device_type,
'device_model': device_model,
'device_name': device_name,
'device_os': device_os
}
except Exception as e:
ERR(f"Error posting location {e}")
ERR(traceback.format_exc())
return None
@locate.post("/locate")
async def post_locate_endpoint(locations: Union[Location, List[Location]]):
responses = []
if isinstance(locations, Location):
locations = [locations]
for location in locations:
if not location.datetime:
current_time = datetime.now(timezone.utc)
location.datetime = current_time.isoformat()
if not location.elevation:
location.elevation = location.altitude if location.altitude else get_elevation(location.latitude, location.longitude)
# Ensure context is a dictionary with default values if not provided
if not location.context:
location.context = {
"action": "manual",
"device_type": "Pythonista",
"device_model": "Unknown",
"device_name": "Unknown",
"device_os": "Unknown"
}
DEBUG(f"datetime before localization: {location.datetime}")
# Convert datetime string to timezone-aware datetime object
location.datetime = localize_dt(location.datetime)
DEBUG(f"datetime after localization: {location.datetime}")
location_entry = await post_location(location)
if location_entry:
responses.append({"location_data": location_entry}) # Add weather data if necessary
await asyncio.sleep(0.1) # Use asyncio.sleep for async compatibility
return {"message": "Locations and weather updated", "results": responses}
# GET endpoint to fetch the last location before the specified datetime
# @locate.get("/last_location", response_model=Union[Location, Dict[str, str]])
@locate.get("/locate", response_model=List[Location])
async def get_last_location() -> JSONResponse:
query_datetime = datetime.now(TZ)
DEBUG(f"Query_datetime: {query_datetime}")
location = await fetch_last_location_before(query_datetime)
if location:
DEBUG(f"location: {location}")
location_dict = location.model_dump() # use model_dump instead of dict
location_dict["datetime"] = location.datetime.isoformat()
return JSONResponse(content=location_dict)
else:
return JSONResponse(content={"message": "No location found before the specified datetime"}, status_code=404)
@locate.get("/locate/{datetime_str}", response_model=List[Location])
async def get_locate(datetime_str: str, all: bool = False):
try:
date_time = localize_dt(datetime_str)
except ValueError as e:
ERR(f"Invalid datetime string provided: {datetime_str}")
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
locations = await fetch_locations(date_time)
if not locations:
raise HTTPException(status_code=404, detail="No nearby data found for this date and time")
return locations if all else [locations[0]]
future_elevation = """
def get_elevation_srtm(latitude, longitude, srtm_file):
try:
# Open the SRTM dataset
dataset = gdal.Open(srtm_file)
# Get the geotransform and band information
geotransform = dataset.GetGeoTransform()
band = dataset.GetRasterBand(1)
# Calculate the pixel coordinates from the latitude and longitude
x = int((longitude - geotransform[0]) / geotransform[1])
y = int((latitude - geotransform[3]) / geotransform[5])
# Read the elevation value from the SRTM dataset
elevation = band.ReadAsArray(x, y, 1, 1)[0][0]
# Close the dataset
dataset = None
return elevation
except Exception as e:
ERR(f"Error: {e}")
return None
"""
def get_elevation2(latitude: float, longitude: float) -> float:
url = f"https://nationalmap.gov/epqs/pqs.php?x={longitude}&y={latitude}&units=Meters&output=json"
try:
response = requests.get(url)
data = response.json()
elevation = data["USGS_Elevation_Point_Query_Service"]["Elevation_Query"]["Elevation"]
return float(elevation)
except Exception as e:
# Handle exceptions (e.g., network errors, API changes) appropriately
raise RuntimeError(f"Error getting elevation data: {str(e)}")

1081
sijapi/routers/note.py Normal file

File diff suppressed because it is too large Load diff

16
sijapi/routers/rag.py Normal file
View file

@ -0,0 +1,16 @@
'''
IN DEVELOPMENT: Retrieval-Augmented Generation module.
NOTES: Haven't yet decided if this should depend on the Obsidian and Chat modules, or if they should depend on it, or one of one the other the other.
'''
from fastapi import APIRouter
rag = APIRouter()
rag.get("/rag/search")
async def rag_search_endpoint(query: str, scope: str):
pass
rag.post("/rag/embed")
async def rag_upload_endpoint(path: str):
pass

455
sijapi/routers/sd.py Normal file
View file

@ -0,0 +1,455 @@
'''
Image generation module using StableDiffusion and similar models by way of ComfyUI.
DEPENDS ON:
LLM module
COMFYUI_URL, COMFYUI_DIR, COMFYUI_OUTPUT_DIR, HOST_PORT, TS_SUBNET, TS_ADDRESS, DATA_DIR, SD_CONFIG_DIR, SD_IMAGE_DIR, SD_WORKFLOWS_DIR, LOCAL_HOSTS, BASE_URL, PHOTOPRISM_USER*, PHOTOPRISM_URL*, PHOTOPRISM_PASS*
*unimplemented.
'''
from fastapi import APIRouter, Request, Response, Query
from starlette.datastructures import Address
from fastapi.responses import JSONResponse, RedirectResponse
from fastapi.staticfiles import StaticFiles
from aiohttp import ClientSession, ClientTimeout
import aiofiles
from PIL import Image
from pathlib import Path
import uuid
import json
import ipaddress
import socket
import subprocess
import os, re, io
import random
from io import BytesIO
import base64
import asyncio
import shutil
# from photoprism.Session import Session
# from photoprism.Photo import Photo
# from webdav3.client import Client
from sijapi.routers.llm import query_ollama
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import COMFYUI_URL, COMFYUI_LAUNCH_CMD, COMFYUI_DIR, COMFYUI_OUTPUT_DIR, HOST_PORT, TS_SUBNET, SD_CONFIG, SD_IMAGE_DIR, SD_WORKFLOWS_DIR, LOCAL_HOSTS, BASE_URL
sd = APIRouter()
CLIENT_ID = str(uuid.uuid4())
@sd.post("/sd")
@sd.post("/v1/images/generations")
async def sd_endpoint(request: Request):
request_data = await request.json()
prompt = request_data.get("prompt")
model = request_data.get("model")
size = request_data.get("size")
style = request_data.get("style") or "photorealistic"
earlyurl = request_data.get("earlyurl", None)
earlyout = "web" if earlyurl else None
image_path = await workflow(prompt=prompt, scene=model, size=size, style=style, earlyout=earlyout)
if earlyout == "web":
return JSONResponse({"image_url": image_path})
# return RedirectResponse(url=image_path, status_code=303)
else:
return JSONResponse({"image_url": image_path})
@sd.get("/sd")
@sd.get("/v1/images/generations")
async def sd_endpoint(
request: Request,
prompt: str = Query(..., description="The prompt for image generation")
):
earlyout = "web"
image_path = await workflow(prompt=prompt, scene="wallpaper", earlyout="web")
if earlyout == "web":
return RedirectResponse(url=image_path, status_code=303)
else:
return JSONResponse({"image_url": image_path})
async def workflow(prompt: str, scene: str = None, size: str = None, style: str = "photorealistic", earlyout: str = "local", destination_path: str = None, downscale_to_fit: bool = False):
scene_data = get_scene(scene)
if not scene_data:
scene_data = get_matching_scene(prompt)
prompt = scene_data['llm_pre_prompt'] + prompt
image_concept = await query_ollama(usr=prompt, sys=scene_data['llm_sys_msg'], max_tokens=100)
scene_workflow = random.choice(scene_data['workflows'])
if size:
DEBUG(f"Specified size: {size}")
size = size if size else scene_workflow.get('size', '1024x1024')
width, height = map(int, size.split('x'))
DEBUG(f"Parsed width: {width}; parsed height: {height}")
workflow_path = Path(SD_WORKFLOWS_DIR) / scene_workflow['workflow']
workflow_data = json.loads(workflow_path.read_text())
post = {
"API_PPrompt": scene_data['API_PPrompt'] + image_concept + ', '.join(f"; (({trigger}))" for trigger in scene_data['triggers']),
"API_SPrompt": scene_data['API_SPrompt'],
"API_NPrompt": scene_data['API_NPrompt'],
"width": width,
"height": height
}
saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept)
print(f"Saved file key: {saved_file_key}")
prompt_id = await queue_prompt(workflow_data)
print(f"Prompt ID: {prompt_id}")
max_size = max(width, height) if downscale_to_fit else None
destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else SD_IMAGE_DIR / f"{prompt_id}.jpg"
if not earlyout:
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
else:
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
await asyncio.sleep(0.5)
return get_web_path(destination_path) if earlyout == "web" else destination_path
async def generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path):
try:
status_data = await poll_status(prompt_id)
image_data = await get_image(status_data, saved_file_key)
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
if Path(jpg_file_path) != Path(destination_path):
ERR(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
except Exception as e:
print(f"Error in generate_and_save_image: {e}")
return None
def get_web_path(file_path: Path) -> str:
uri = file_path.relative_to(SD_IMAGE_DIR)
web_path = f"{BASE_URL}/img/{uri}"
return web_path
async def poll_status(prompt_id):
"""Asynchronously poll the job status until it's complete and return the status data."""
start_time = asyncio.get_event_loop().time()
async with ClientSession() as session:
while True:
elapsed_time = int(asyncio.get_event_loop().time() - start_time)
async with session.get(f"{COMFYUI_URL}/history/{prompt_id}") as response:
if response.status != 200:
raise Exception("Failed to get job status")
status_data = await response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
print(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
await asyncio.sleep(1)
async def get_image(status_data, key):
"""Asynchronously extract the filename and subfolder from the status data and read the file."""
try:
outputs = status_data.get("outputs", {})
images_info = outputs.get(key, {}).get("images", [])
if not images_info:
raise Exception("No images found in the job output.")
image_info = images_info[0]
filename = image_info.get("filename")
subfolder = image_info.get("subfolder", "")
file_path = os.path.join(COMFYUI_OUTPUT_DIR, subfolder, filename)
async with aiofiles.open(file_path, 'rb') as file:
return await file.read()
except Exception as e:
raise Exception(f"Failed to get image: {e}")
async def save_as_jpg(image_data, prompt_id, max_size = None, quality = 100, destination_path: Path = None):
destination_path_png = (SD_IMAGE_DIR / prompt_id).with_suffix(".png")
destination_path_jpg = destination_path.with_suffix(".jpg") if destination_path else (SD_IMAGE_DIR / prompt_id).with_suffix(".jpg")
try:
destination_path_png.parent.mkdir(parents=True, exist_ok=True)
destination_path_jpg.parent.mkdir(parents=True, exist_ok=True)
# Save the PNG
async with aiofiles.open(destination_path_png, 'wb') as f:
await f.write(image_data)
# Open, possibly resize, and save as JPG
with Image.open(destination_path_png) as img:
if max_size and max(img.size) > max_size:
ratio = max_size / max(img.size)
new_size = tuple([int(x * ratio) for x in img.size])
img = img.resize(new_size, Image.Resampling.LANCZOS)
img.convert('RGB').save(destination_path_jpg, format='JPEG', quality=quality)
# Optionally remove the PNG
os.remove(destination_path_png)
return str(destination_path_jpg)
except Exception as e:
print(f"Error processing image: {e}")
return None
def set_presets(workflow_data, preset_values):
if preset_values:
preset_node = preset_values.get('node')
preset_key = preset_values.get('key')
values = preset_values.get('values')
if preset_node and preset_key and values:
preset_value = random.choice(values)
if 'inputs' in workflow_data.get(preset_node, {}):
workflow_data[preset_node]['inputs'][preset_key] = preset_value
else:
DEBUG("Node not found in workflow_data")
else:
DEBUG("Required data missing in preset_values")
else:
DEBUG("No preset_values found")
def get_return_path(destination_path):
sd_dir = Path(SD_IMAGE_DIR)
if destination_path.parent.samefile(sd_dir):
return destination_path.name
else:
return str(destination_path)
# This allows selected scenes by name
def get_scene(scene):
for scene_data in SD_CONFIG['scenes']:
if scene_data['scene'] == scene:
return scene_data
return None
# This returns the scene with the most trigger words present in the provided prompt, or otherwise if none match it returns the first scene in the array - meaning the first should be considered the default scene.
def get_matching_scene(prompt):
prompt_lower = prompt.lower()
max_count = 0
scene_data = None
for sc in SD_CONFIG['scenes']:
count = sum(1 for trigger in sc['triggers'] if trigger in prompt_lower)
if count > max_count:
max_count = count
scene_data = sc
return scene_data if scene_data else SD_CONFIG['scenes'][0] # fall back on first scene, which should be an appropriate default scene.
import asyncio
import socket
import subprocess
from typing import Optional
async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
"""
Ensures that ComfyUI is running, starting it if necessary.
Args:
retries (int): Number of connection attempts. Defaults to 3.
timeout (float): Time to wait between attempts in seconds. Defaults to 5.0.
Raises:
RuntimeError: If ComfyUI couldn't be started or connected to after all retries.
"""
for attempt in range(retries):
try:
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
print("ComfyUI is already running.")
return
except (socket.timeout, ConnectionRefusedError):
if attempt == 0: # Only try to start ComfyUI on the first failed attempt
print("ComfyUI is not running. Starting it now...")
try:
tmux_command = (
"tmux split-window -h "
"\"source /Users/sij/.zshrc; cd /Users/sij/workshop/ComfyUI; "
"mamba activate comfyui && "
"python main.py; exec $SHELL\""
)
subprocess.Popen(tmux_command, shell=True)
print("ComfyUI started in a new tmux session.")
except Exception as e:
raise RuntimeError(f"Error starting ComfyUI: {e}")
print(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
await asyncio.sleep(timeout)
raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
# async def upload_and_get_shareable_link(image_path):
# try:
# Set up the PhotoPrism session
# pp_session = Session(PHOTOPRISM_USER, PHOTOPRISM_PASS, PHOTOPRISM_URL, use_https=True)
# pp_session.create()
# Start import
# photo = Photo(pp_session)
# photo.start_import(path=os.path.dirname(image_path))
# Give PhotoPrism some time to process the upload
# await asyncio.sleep(5)
# Search for the uploaded photo
# photo_name = os.path.basename(image_path)
# search_results = photo.search(query=f"name:{photo_name}", count=1)
# if search_results['photos']:
# photo_uuid = search_results['photos'][0]['uuid']
# shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}"
# return shareable_link
# else:
# ERR("Could not find the uploaded photo details.")
# return None
# except Exception as e:
# ERR(f"Error in upload_and_get_shareable_link: {e}")
# return None
@sd.get("/image/{prompt_id}")
async def get_image_status(prompt_id: str):
status_data = await poll_status(prompt_id)
save_image_key = None
for key, value in status_data.get("outputs", {}).items():
if "images" in value:
save_image_key = key
break
if save_image_key:
image_data = await get_image(status_data, save_image_key)
await save_as_jpg(image_data, prompt_id)
external_url = f"https://api.lone.blue/img/{prompt_id}.jpg"
return JSONResponse({"image_url": external_url})
else:
return JSONResponse(content={"status": "Processing", "details": status_data}, status_code=202)
@sd.get("/image-status/{prompt_id}")
async def get_image_processing_status(prompt_id: str):
try:
status_data = await poll_status(prompt_id)
return JSONResponse(content={"status": "Processing", "details": status_data}, status_code=200)
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)
@sd.options("/v1/images/generations", tags=["generations"])
async def get_generation_options():
return {
"model": {
"description": "The model to use for image generation.",
"type": "string",
"example": "stable-diffusion"
},
"prompt": {
"description": "The text prompt for the image generation.",
"type": "string",
"required": True,
"example": "A beautiful sunset over the ocean."
},
"n": {
"description": "The number of images to generate.",
"type": "integer",
"default": 1,
"example": 3
},
"size": {
"description": "The size of the generated images in 'widthxheight' format.",
"type": "string",
"default": "1024x1024",
"example": "512x512"
},
"style": {
"description": "The style for the generated images.",
"type": "string",
"default": "photorealistic",
"example": "cartoon"
},
"raw": {
"description": "Whether to return raw image data or not.",
"type": "boolean",
"default": False
},
"earlyurl": {
"description": "Whether to return the URL early or wait for the image to be ready.",
"type": "boolean",
"default": False
}
}
async def load_workflow(workflow_path: str, workflow:str):
workflow_path = workflow_path if workflow_path else os.path.join(SD_WORKFLOWS_DIR, f"{workflow}.json" if not workflow.endswith('.json') else workflow)
with open(workflow_path, 'r') as file:
return json.load(file)
async def update_prompt_and_get_key(workflow: dict, post: dict, positive: str):
'''
Recurses through the workflow searching for and substituting the dynamic values for API_PPrompt, API_SPrompt, API_NPrompt, width, height, and seed (random integer).
Even more important, it finds and returns the key to the filepath where the file is saved, which we need to decipher status when generation is complete.
'''
found_key = [None]
def update_recursive(workflow, path=None):
if path is None:
path = []
if isinstance(workflow, dict):
for key, value in workflow.items():
current_path = path + [key]
if isinstance(value, dict):
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
found_key[0] = key
update_recursive(value, current_path)
elif isinstance(value, list):
for index, item in enumerate(value):
update_recursive(item, current_path + [str(index)])
if value == "API_PPrompt":
workflow[key] = post.get(value, "") + positive
elif value in ["API_SPrompt", "API_NPrompt"]:
workflow[key] = post.get(value, "")
elif key in ["seed", "noise_seed"]:
workflow[key] = random.randint(1000000000000, 9999999999999)
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
DEBUG(f"Got a hit for a dimension: {key} {value}")
if value == 1023:
workflow[key] = post.get("width", 1024)
DEBUG(f"Set {key} to {workflow[key]}.")
elif value == 1025:
workflow[key] = post.get("height", 1024)
DEBUG(f"Set {key} to {workflow[key]}.")
update_recursive(workflow)
return found_key[0]
async def queue_prompt(workflow_data):
await ensure_comfy()
async with ClientSession() as session:
async with session.post(f"{COMFYUI_URL}/prompt", json={"prompt": workflow_data}) as response:
if response.status == 200:
data = await response.json()
return data.get('prompt_id')
else:
raise Exception(f"Failed to queue prompt. Status code: {response.status}")

73
sijapi/routers/serve.py Normal file
View file

@ -0,0 +1,73 @@
'''
Web server module. Used by other modules when serving static content is required, e.g. the sd image generation module. Also used to serve PUBLIC_KEY.
'''
import os
from fastapi import APIRouter, Form, HTTPException, Request, Response
from fastapi.responses import FileResponse, PlainTextResponse
from pathlib import Path
from datetime import datetime
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pathlib import Path
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
from sijapi import DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
serve = APIRouter(tags=["public"])
@serve.get("/pgp")
async def get_pgp():
return Response(PUBLIC_KEY, media_type="text/plain")
@serve.get("/img/{image_name}")
def serve_image(image_name: str):
image_path = os.path.join(SD_IMAGE_DIR, image_name)
if os.path.exists(image_path):
return FileResponse(image_path)
else:
return {"error": "Image not found"}
def construct_journal_path(date_str: str) -> Path:
try:
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
journal_path = OBSIDIAN_VAULT_DIR / f'journal/{date_obj:%Y}/{date_obj:%Y-%m %B}/{date_obj:%Y-%m-%d %A}/{date_obj:%Y-%m-%d %A}.md'
return journal_path
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
def is_valid_date(date_str: str) -> bool:
try:
datetime.strptime(date_str, '%Y-%m-%d')
return True
except ValueError:
return False
@serve.get("/notes/{file_path:path}")
async def get_file(file_path: str):
if is_valid_date(file_path):
absolute_path, local_path = assemble_journal_path(file_path, no_timestamp = True)
else:
absolute_path = OBSIDIAN_VAULT_DIR / file_path
if not absolute_path.suffix:
absolute_path = absolute_path.with_suffix(".md")
if not absolute_path.is_file():
raise HTTPException(status_code=404, detail="File not found")
if absolute_path.suffix == '.md':
try:
with open(absolute_path, 'r', encoding='utf-8') as file:
content = file.read()
return PlainTextResponse(content)
except Exception as e:
raise HTTPException(status_code=500, detail="Internal Server Error")
elif absolute_path.suffix in ['.png', '.jpg', '.jpeg']:
return FileResponse(absolute_path)
else:
raise HTTPException(status_code=400, detail="Unsupported file type")

211
sijapi/routers/summarize.py Normal file
View file

@ -0,0 +1,211 @@
from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse
from pathlib import Path
import tempfile
import filetype
import shutil
import os
import re
from os.path import basename, splitext
from datetime import datetime
from typing import Optional, Union, List
from PyPDF2 import PdfReader
from pdfminer.high_level import extract_text as pdfminer_extract_text
import pytesseract
from pdf2image import convert_from_path
import asyncio
import html2text
import markdown
from ollama import Client, AsyncClient
from docx import Document
from sijapi.routers.tts import generate_speech
from sijapi.routers.asr import transcribe_audio
from sijapi.utilities import sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension, f
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
summarize = APIRouter(tags=["trusted", "private"])
@summarize.get("/summarize")
async def summarize_get(text: str = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
summarized_text = await summarize_text(text, instruction)
return summarized_text
@summarize.post("/summarize")
async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
text_content = text if text else await extract_text(file)
summarized_text = await summarize_text(text_content, instruction)
return summarized_text
@summarize.post("/speaksummary")
async def summarize_tts_endpoint(background_tasks: BackgroundTasks, instruction: str = Form(SUMMARY_INSTRUCT), file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), voice: Optional[str] = Form(DEFAULT_VOICE), speed: Optional[float] = Form(1.2), podcast: Union[bool, str] = Form(False)):
podcast = str_to_bool(str(podcast)) # Proper boolean conversion
text_content = text if text else extract_text(file)
final_output_path = await summarize_tts(text_content, instruction, voice, speed, podcast)
return FileResponse(path=final_output_path, filename=os.path.basename(final_output_path), media_type='audio/wav')
async def summarize_tts(
text: str,
instruction: str = SUMMARY_INSTRUCT,
voice: Optional[str] = DEFAULT_VOICE,
speed: float = 1.1,
podcast: bool = False,
LLM: AsyncClient = None
):
LLM = LLM if LLM else AsyncClient()
summarized_text = await summarize_text(text, instruction, LLM=LLM)
filename = await summarize_text(summarized_text, "Provide a title for this summary no longer than 4 words")
filename = sanitize_filename(filename)
filename = ' '.join(filename.split()[:5])
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{timestamp}{filename}.wav"
background_tasks = BackgroundTasks()
final_output_path = await generate_speech(background_tasks, summarized_text, voice, "xtts", speed=speed, podcast=podcast, title=filename)
DEBUG(f"summary_tts completed with final_output_path: {final_output_path}")
return final_output_path
async def get_title(text: str, LLM: AsyncClient() = None):
LLM = LLM if LLM else AsyncClient()
title = await process_chunk("Generate a title for this text", text, 1, 1, 12, LLM)
title = sanitize_filename(title)
return title
def split_text_into_chunks(text: str) -> List[str]:
"""
Splits the given text into manageable chunks based on predefined size and overlap.
"""
words = text.split()
adjusted_chunk_size = max(1, int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)) # Ensure at least 1
adjusted_overlap = max(0, int(SUMMARY_CHUNK_OVERLAP / SUMMARY_TPW)) # Ensure non-negative
chunks = []
for i in range(0, len(words), adjusted_chunk_size - adjusted_overlap):
DEBUG(f"We are on iteration # {i} if split_text_into_chunks.")
chunk = ' '.join(words[i:i + adjusted_chunk_size])
chunks.append(chunk)
return chunks
def calculate_max_tokens(text: str) -> int:
tokens_count = max(1, int(len(text.split()) * SUMMARY_TPW)) # Ensure at least 1
return min(tokens_count // 4, SUMMARY_CHUNK_SIZE)
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], background_tasks: BackgroundTasks = None) -> str:
if isinstance(file, UploadFile):
file_extension = get_extension(file)
temp_file_path = tempfile.mktemp(suffix=file_extension)
with open(temp_file_path, 'wb') as buffer:
shutil.copyfileobj(file.file, buffer)
file_path = temp_file_path
elif isinstance(file, (bytes, bytearray)):
temp_file_path = tempfile.mktemp()
with open(temp_file_path, 'wb') as buffer:
buffer.write(file)
file_path = temp_file_path
elif isinstance(file, (str, Path)):
file_path = str(file)
else:
raise ValueError("Unsupported file type")
_, file_ext = os.path.splitext(file_path)
file_ext = file_ext.lower()
text_content = ""
if file_ext == '.pdf':
text_content = await extract_text_from_pdf(file_path)
elif file_ext in ['.wav', '.m4a', '.m4v', '.mp3', '.mp4']:
text_content = await transcribe_audio(file_path=file_path)
elif file_ext == '.md':
text_content = await read_text_file(file_path)
text_content = markdown.markdown(text_content)
elif file_ext == '.html':
text_content = await read_text_file(file_path)
text_content = html2text.html2text(text_content)
elif file_ext in ['.txt', '.csv', '.json']:
text_content = await read_text_file(file_path)
elif file_ext == '.docx':
text_content = await extract_text_from_docx(file_path)
if background_tasks and 'temp_file_path' in locals():
background_tasks.add_task(os.remove, temp_file_path)
elif 'temp_file_path' in locals():
os.remove(temp_file_path)
return text_content
async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_override: int = None, length_quotient: float = SUMMARY_LENGTH_RATIO, LLM: AsyncClient = None):
"""
Process the given text: split into chunks, summarize each chunk, and
potentially summarize the concatenated summary for long texts.
"""
LLM = LLM if LLM else AsyncClient()
chunked_text = split_text_into_chunks(text)
total_parts = max(1, len(chunked_text)) # Ensure at least 1
total_words_count = len(text.split())
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW)) # Ensure at least 1
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
individual_summary_length = max(1, corrected_total_summary_length // total_parts) # Ensure at least 1
DEBUG(f"Text split into {total_parts} chunks.")
summaries = await asyncio.gather(*[
process_chunk(instruction, chunk, i+1, total_parts, individual_summary_length, LLM) for i, chunk in enumerate(chunked_text)
])
concatenated_summary = ' '.join(summaries)
if total_parts > 1:
concatenated_summary = await process_chunk(instruction, concatenated_summary, 1, 1)
return concatenated_summary
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, max_tokens: Optional[int] = None, LLM: AsyncClient = None) -> str:
"""
Process a portion of text using the ollama library asynchronously.
"""
LLM = LLM if LLM else AsyncClient()
words_count = max(1, len(text.split())) # Ensure at least 1
tokens_count = max(1, int(words_count * SUMMARY_TPW)) # Ensure at least 1
fraction_tokens = max(1, tokens_count // SUMMARY_LENGTH_RATIO) # Ensure at least 1
if max_tokens is None:
max_tokens = min(fraction_tokens, SUMMARY_CHUNK_SIZE // max(1, total_parts)) # Ensure at least 1
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) # Ensure a minimum token count to avoid tiny processing chunks
DEBUG(f"Summarizing part {part} of {total_parts}: Max_tokens: {max_tokens}")
if part and total_parts > 1:
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
else:
prompt = f"{instruction}:\n\n{text}"
DEBUG(f"Starting LLM.generate for part {part} of {total_parts}")
response = await LLM.generate(
model=SUMMARY_MODEL,
prompt=prompt,
stream=False,
options={'num_predict': max_tokens, 'temperature': 0.6}
)
text_response = response['response']
DEBUG(f"Completed LLM.generate for part {part} of {total_parts}")
return text_response
async def title_and_summary(extracted_text: str):
title = await get_title(extracted_text)
processed_title = title.split("\n")[-1]
processed_title = processed_title.split("\r")[-1]
processed_title = sanitize_filename(processed_title)
summary = await summarize_text(extracted_text)
return processed_title, summary

577
sijapi/routers/time.py Normal file
View file

@ -0,0 +1,577 @@
import tempfile
import os
import json
import requests
import csv
import subprocess
import asyncio
import httpx
import io
import re
import pytz
import httpx
import sqlite3
import math
from httpx import Timeout
from fastapi import APIRouter, UploadFile, File, Response, Header, Query, Depends, FastAPI, Request, HTTPException, status
from fastapi.responses import FileResponse, JSONResponse
from pydantic import BaseModel, Field
from datetime import datetime, timedelta
from sijapi.utilities import localize_dt
from decimal import Decimal, ROUND_UP
from typing import Optional, List, Dict, Union, Tuple
from collections import defaultdict
from dotenv import load_dotenv
from traceback import format_exc
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, TIMING_API_KEY, TIMING_API_URL
### INITIALIZATIONS ###
time = APIRouter(tags=["private"])
########################
#### INITIALIZATION ####
########################
script_directory = os.path.dirname(os.path.abspath(__file__))
# Configuration constants
pacific = pytz.timezone('America/Los_Angeles')
emoji_pattern = re.compile(r'^[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F700-\U0001F77F\U0001F780-\U0001F7FF\U0001F800-\U0001F8FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FA6F\U0001FA70-\U0001FAFF\U00002702-\U000027B0\U000024C2-\U0001F251]+ ')
timeout = Timeout(connect=30, read=600, write=120, pool=5)
# Define your models
class TimingRequest(BaseModel):
start_date: str = Field(..., pattern=r"\d{4}-\d{2}-\d{2}")
end_date: Optional[str] = Field(None, pattern=r"\d{4}-\d{2}-\d{2}")
output_format: Optional[str] = 'json'
####################
#### TIMING API ####
####################
@time.post("/time/post")
async def post_time_entry_to_timing(entry: Dict):
url = 'https://web.timingapp.com/api/v1/time-entries'
headers = {
'Authorization': f'Bearer {TIMING_API_KEY}',
'Content-Type': 'application/json',
'Accept': 'application/json',
'X-Time-Zone': 'America/Los_Angeles'
}
DEBUG(f"Received entry: {entry}")
response = None # Initialize response
try:
async with httpx.AsyncClient() as client:
response = await client.post(url, headers=headers, json=entry)
response.raise_for_status() # This will only raise for 4xx and 5xx responses
except httpx.HTTPStatusError as exc:
DEBUG(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
except Exception as exc:
DEBUG(f"General exception caught: {exc}")
raise HTTPException(status_code=500, detail="An unexpected error occurred")
if response:
return response.json()
else:
# Handle the case where the response was not set due to an error.
raise HTTPException(status_code=500, detail="Failed to make the external API request")
def project_sort_key(project):
# Remove any leading emoji characters for sorting
return emoji_pattern.sub('', project)
def prepare_date_range_for_query(start_date, end_date=None):
# Adjust the start date to include the day before
start_date_adjusted = (datetime.strptime(start_date, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")
# If end_date is not provided, use the original start_date as the end_date
end_date = end_date if end_date else start_date
# Format the end_date
end_date_formatted = f"{end_date}T23:59:59"
return f"{start_date_adjusted}T00:00:00", end_date_formatted
def truncate_project_title(title):
return title.split(' - ')[0] if ' - ' in title else title
async def fetch_and_prepare_timing_data(start: datetime, end: Optional[datetime] = None) -> List[Dict]:
# start_date = localize_dt(start)
# end_date = localize_dt(end) if end else None
# Adjust the start date to include the day before and format the end date
start_date_adjusted = (start - timedelta(days=1)).strftime("%Y-%m-%dT00:00:00")
end_date_formatted = f"{datetime.strftime(end, '%Y-%m-%d')}T23:59:59" if end else f"{datetime.strftime(start, '%Y-%m-%d')}T23:59:59"
# Fetch timing data from the API using TIMING_API_KEY
url = f"{TIMING_API_URL}/time-entries?start_date_min={start_date_adjusted}&start_date_max={end_date_formatted}&include_project_data=1"
headers = {
'Authorization': f'Bearer {TIMING_API_KEY}',
'Content-Type': 'application/json',
'Accept': 'application/json',
'X-Time-Zone': 'America/Los_Angeles'
}
processed_timing_data = []
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers)
if response.status_code != 200:
response.raise_for_status()
raw_timing_data = response.json().get('data', [])
for entry in raw_timing_data:
entry_start_utc = datetime.strptime(entry['start_date'], '%Y-%m-%dT%H:%M:%S.%f%z')
entry_end_utc = datetime.strptime(entry['end_date'], '%Y-%m-%dT%H:%M:%S.%f%z')
entry_start_pacific = entry_start_utc.astimezone(pacific)
entry_end_pacific = entry_end_utc.astimezone(pacific)
while entry_start_pacific.date() < entry_end_pacific.date():
midnight = pacific.localize(datetime.combine(entry_start_pacific.date() + timedelta(days=1), datetime.min.time()))
duration_to_midnight = (midnight - entry_start_pacific).total_seconds()
if entry_start_pacific.date() >= start.date():
processed_entry = create_time_entry(entry, entry_start_pacific, midnight, duration_to_midnight)
processed_timing_data.append(processed_entry)
entry_start_pacific = midnight
if entry_start_pacific.date() >= start.date():
duration_remaining = (entry_end_pacific - entry_start_pacific).total_seconds()
processed_entry = create_time_entry(entry, entry_start_pacific, entry_end_pacific, duration_remaining)
processed_timing_data.append(processed_entry)
return processed_timing_data
def format_duration(duration):
duration_in_hours = Decimal(duration) / Decimal(3600)
rounded_duration = duration_in_hours.quantize(Decimal('0.1'), rounding=ROUND_UP)
return str(rounded_duration)
def create_time_entry(original_entry, start_time, end_time, duration_seconds):
"""Formats a time entry, preserving key details and adding necessary elements."""
# Format start and end times in the appropriate timezone
start_time_aware = start_time.astimezone(pacific)
end_time_aware = end_time.astimezone(pacific)
# Check if project is None and handle accordingly
if original_entry.get('project'):
project_title = original_entry['project'].get('title', 'No Project')
project_color = original_entry['project'].get('color', '#FFFFFF') # Default color
else:
project_title = 'No Project'
project_color = '#FFFFFF' # Default color
# Construct the processed entry
processed_entry = {
'start_time': start_time_aware.strftime('%Y-%m-%dT%H:%M:%S.%f%z'),
'end_time': end_time_aware.strftime('%Y-%m-%dT%H:%M:%S.%f%z'),
'start_date': start_time_aware.strftime('%Y-%m-%d'),
'end_date': end_time_aware.strftime('%Y-%m-%d'),
'duration': format_duration(duration_seconds),
'notes': original_entry.get('notes', ''),
'title': original_entry.get('title', 'Untitled'),
'is_running': original_entry.get('is_running', False),
'project': {
'title': project_title,
'color': project_color,
# Include other project fields as needed
},
# Additional original fields as required
}
return processed_entry
# TIMELINE
@time.get("/time/line")
async def get_timing_timeline(
request: Request,
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
# Retain these for processing timeline data with the correct timezone
queried_start_date = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
queried_end_date = (datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
if end_date else queried_start_date)
# Fetch and process timing data
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
# Process timeline data
timeline_formatted_data = process_timeline(timing_data, queried_start_date, queried_end_date)
return Response(content=timeline_formatted_data, media_type="text/markdown")
def process_timeline(timing_data, queried_start_date, queried_end_date):
timeline_output = []
entries_by_date = defaultdict(list)
for entry in timing_data:
# Convert start and end times to datetime objects and localize to Pacific timezone
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
project_title = truncate_project_title(entry['project']['title']) if entry.get('project') else 'No Project'
task_title = entry['title'] if entry.get('title') else 'Untitled'
# Check if the entry's date falls within the queried date range
if queried_start_date <= start_datetime.date() <= queried_end_date:
duration_seconds = (end_datetime - start_datetime).total_seconds()
duration_hours = format_duration(duration_seconds)
entries_by_date[start_datetime.date()].append(
(start_datetime.strftime('%H:%M:%S'), project_title, task_title, duration_hours)
)
# Sorting and outputting the timeline
for date, entries in sorted(entries_by_date.items()):
sorted_entries = sorted(entries, key=lambda x: x[0])
day_total_duration = sum(Decimal(entry[3]) for entry in sorted_entries)
if queried_start_date != queried_end_date:
timeline_output.append(f"## {date.strftime('%Y-%m-%d')} {date.strftime('%A')} [{day_total_duration}]\n")
for start_time, project, task, duration in sorted_entries:
timeline_output.append(f" - {start_time} {project} - {task} [{duration}]")
return "\n".join(timeline_output)
# CSV
@time.get("/time/csv")
async def get_timing_csv(
request: Request,
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
# Fetch and process timing data
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
# Retain these for processing CSV data with the correct timezone
queried_start_date = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
queried_end_date = (datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=pacific).date()
if end_date else queried_start_date)
# Process CSV data
csv_data = process_csv(timing_data, queried_start_date, queried_end_date)
if not csv_data or csv_data.strip() == "":
return Response(content="No CSV data available for the specified date range.", media_type="text/plain")
return Response(content=csv_data, media_type="text/csv")
def process_csv(timing_data, queried_start_date, queried_end_date):
project_task_data = defaultdict(lambda: defaultdict(list))
for entry in timing_data:
# Convert start and end times to datetime objects and localize to Pacific timezone
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
# Ensure the entry's date falls within the queried date range
if queried_start_date <= start_datetime.date() <= queried_end_date:
duration_seconds = (end_datetime - start_datetime).total_seconds()
duration_hours = format_duration(duration_seconds) # Convert duration to hours
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
project_task_data[start_datetime.date()][project_title].append(
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
)
output = io.StringIO()
writer = csv.writer(output, delimiter='|', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Date', 'Project', 'Task', 'Notes', 'Duration'])
for date, project_tasks in sorted(project_task_data.items()):
day_total_duration = Decimal(0)
formatted_date = date.strftime('%Y-%m-%d %a')
for project, tasks in sorted(project_tasks.items(), key=lambda item: project_sort_key(item[0])):
task_summary = defaultdict(Decimal)
for task, duration in tasks:
task_summary[task] += Decimal(duration)
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
day_total_duration += project_duration
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{str(task_summary[task].quantize(Decimal('0.1')))}]" for task in task_summary])
writer.writerow([formatted_date, project, tasks_formatted, '', str(project_duration.quantize(Decimal('0.1')))])
writer.writerow([formatted_date, 'Day Total', '', '', str(day_total_duration.quantize(Decimal('0.1')))])
writer.writerow(['', '', '', '', ''])
return output.getvalue()
# MARKDOWN
@time.get("/time/markdown3")
async def get_timing_markdown3(
request: Request,
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
# Fetch and process timing data
start = localize_dt(start_date)
end = localize_dt(end_date) if end_date else None
timing_data = await fetch_and_prepare_timing_data(start, end)
# Retain these for processing Markdown data with the correct timezone
queried_start_date = start.replace(tzinfo=pacific).date()
queried_end_date = end.replace(tzinfo=pacific).date() if end else queried_start_date
# Process Markdown data
markdown_formatted_data = process_timing_markdown3(timing_data, queried_start_date, queried_end_date)
return Response(content=markdown_formatted_data, media_type="text/markdown")
def process_timing_markdown3(timing_data, queried_start_date, queried_end_date):
markdown_output = []
project_task_data = defaultdict(lambda: defaultdict(list))
for entry in timing_data:
# Convert start and end times to datetime objects and localize to Pacific timezone
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
# Check if the entry's date falls within the queried date range
if queried_start_date <= start_datetime.date() <= queried_end_date:
duration_seconds = (end_datetime - start_datetime).total_seconds()
duration_hours = format_duration(duration_seconds)
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
project_task_data[start_datetime.date()][project_title].append(
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
)
for date, projects in sorted(project_task_data.items()):
day_total_duration = Decimal(0)
tasks_output = []
for project, tasks in sorted(projects.items(), key=lambda item: project_sort_key(item[0])):
task_summary = defaultdict(Decimal)
for task, duration in tasks:
task_summary[task] += Decimal(duration)
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
day_total_duration += project_duration
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{duration}]" for task, duration in task_summary.items()])
tasks_output.append(f"- {project} - {tasks_formatted} - *{project_duration}*.")
if queried_start_date != queried_end_date:
markdown_output.append(f"## {date.strftime('%Y-%m-%d %A')} [{day_total_duration}]\n")
markdown_output.extend(tasks_output)
markdown_output.append("")
return "\n".join(markdown_output)
@time.get("/time/markdown")
async def get_timing_markdown(
request: Request,
start: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
start_date = localize_dt(start)
end_date = localize_dt(end)
markdown_formatted_data = await process_timing_markdown(start_date, end_date)
return Response(content=markdown_formatted_data, media_type="text/markdown")
#return JSONResponse(content={"markdown": markdown_formatted_data}, media_type="text/markdown")
async def process_timing_markdown(start_date: datetime, end_date: datetime): # timing_data, queried_start_date, queried_end_date)
timing_data = await fetch_and_prepare_timing_data(start_date, end_date)
queried_start_date = start_date.replace(tzinfo=pacific).date()
queried_end_date = (end_date.replace(tzinfo=pacific).date() if end_date else queried_start_date)
markdown_output = []
project_task_data = defaultdict(lambda: defaultdict(list))
# pacific = pytz.timezone('US/Pacific')
for entry in timing_data:
# Convert start and end times to datetime objects and localize to Pacific timezone
start_datetime = datetime.strptime(entry['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
end_datetime = datetime.strptime(entry['end_time'], '%Y-%m-%dT%H:%M:%S.%f%z').astimezone(pacific)
# Check if the entry's date falls within the queried date range
if queried_start_date <= start_datetime.date() <= queried_end_date:
duration_seconds = (end_datetime - start_datetime).total_seconds()
duration_hours = format_duration(duration_seconds)
project_title = truncate_project_title(entry['project']['title']) if 'title' in entry['project'] else 'No Project'
project_task_data[start_datetime.date()][project_title].append(
(entry['title'] if entry.get('title') else 'Untitled', duration_hours)
)
for date, projects in sorted(project_task_data.items()):
day_total_duration = Decimal(0)
tasks_output = []
for project, tasks in sorted(projects.items(), key=lambda item: project_sort_key(item[0])):
task_summary = defaultdict(Decimal)
for task, duration in tasks:
task_summary[task] += Decimal(duration)
project_duration = sum(task_summary.values()).quantize(Decimal('0.1'))
day_total_duration += project_duration
tasks_formatted = "; ".join([f"{task.replace(';', ',')} [{duration}]" for task, duration in task_summary.items()])
tasks_output.append(f"|{project}|{tasks_formatted}|{project_duration}|")
if queried_start_date != queried_end_date:
markdown_output.append(f"## {date.strftime('%Y-%m-%d %A')} [{day_total_duration}]\n")
tableheader = """|Project|Task(s)|Duration|
|-------|-------|-------:|"""
markdown_output.append(tableheader)
markdown_output.extend(tasks_output)
markdown_output.append(f"|TOTAL| |{day_total_duration}|\n")
markdown_output.append("")
return "\n".join(markdown_output)
#JSON
@time.get("/time/json")
async def get_timing_json(
request: Request,
start_date: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
end_date: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
):
# Fetch and process timing data
start = localize_dt(start_date)
end = localize_dt(end_date)
timing_data = await fetch_and_prepare_timing_data(start, end)
# Convert processed data to the required JSON structure
json_data = process_json(timing_data)
return JSONResponse(content=json_data)
def process_json(timing_data):
structured_data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for entry in timing_data:
date_key = entry['start_date'] # Already in 'YYYY-MM-DD' format
project_title = entry['project']['title'] if 'title' in entry['project'] else 'No Project'
task_title = entry['title']
structured_data[date_key][project_title][task_title].append(entry)
return dict(structured_data)
# ROCKETMATTER CSV PARSING
def load_project_names(filename):
with open(filename, 'r', encoding='utf-8') as file:
return json.load(file)
def parse_input(fields, project_name_mappings, start_times_by_date):
project_code = fields[3].strip()
project_name = project_name_mappings.get(project_code, project_code)
task_descriptions = fields[4].strip()
billing_date_str = fields[6].strip()
total_hours = float(fields[9].strip())
billing_date = datetime.strptime(billing_date_str, "%m/%d/%Y").date()
# If no start time is recorded for this billing_date, default to 8 AM
if billing_date not in start_times_by_date:
start_time = pacific.localize(datetime.combine(billing_date, datetime.min.time()).replace(hour=8))
else:
start_time = start_times_by_date[billing_date]
# Normalize the task descriptions by converting line breaks and variations of task separators (],), (),)\s to standard form [,]
task_descriptions = re.sub(r'(\)|\])(\s+|$)(?=\[|\(|[A-Za-z])', '],', task_descriptions)
task_descriptions = re.sub(r'(\r?\n|\r)', ',', task_descriptions)
# Regex pattern to match task descriptions along with their respective durations.
task_pattern = re.compile(r'(.*?)[\[\(](\d+\.\d+)[\]\)]\s*,?')
tasks_with_durations = task_pattern.findall(task_descriptions)
tasks = []
total_calc_hours = 0
# Process tasks with explicit durations
for task in tasks_with_durations:
task_name, duration_hours = task[0].strip(' ,;'), float(task[1])
task_name = task_name if task_name else "Undefined Task"
tasks.append((task_name, duration_hours))
total_calc_hours += duration_hours
# If there are hours not accounted for, consider them for a task without a specific duration
remainder = total_hours - total_calc_hours
if remainder > 0:
# Include non-specific task or "Undefined Task"
non_duration_task = re.sub(task_pattern, '', task_descriptions).strip(' ,;')
if not non_duration_task:
non_duration_task = "Undefined Task"
tasks.append((non_duration_task, remainder))
# If no specific task durations are found in the description, treat the entire description as one task
if not tasks_with_durations:
task_name = task_descriptions if task_descriptions else "Undefined Task"
tasks.append((task_name, total_hours))
json_entries = []
for task_name, duration_hours in tasks:
duration = timedelta(hours=duration_hours)
end_time = start_time + duration
entry = {
"project": project_name,
"Task": task_name,
"Start_time": start_time.strftime("%Y-%m-%d %H:%M:%S-07:00"),
"End_time": end_time.strftime("%Y-%m-%d %H:%M:%S-07:00")
}
json_entries.append(entry)
start_time = end_time
# Update the start time for the billing_date in the dictionary
start_times_by_date[billing_date] = start_time
return json_entries
async def post_time_entry_to_timing(entry):
url = f"{TIMING_API_URL}/time-entries" # The URL for posting time entries
headers = {
"Authorization": f"Bearer {TIMING_API_KEY}",
"Content-Type": "application/json",
"Accept": "application/json",
'X-Time-Zone': 'America/Los_Angeles' # Set the timezone for the API request
}
data = {
"start_date": entry["Start_time"], # Format these according to the API's requirements
"end_date": entry["End_time"],
"project": entry["project"],
"title": entry["Task"],
"notes": "Automatically generated based on Rocketmatter reports.",
"replace_existing": False
}
response = await httpx.post(url, headers=headers, json=data)
return response.status_code, response.json()
@time.get("/time/flagemoji/{country_code}")
def flag_emoji(country_code: str):
offset = 127397
flag = ''.join(chr(ord(char) + offset) for char in country_code.upper())
return {"emoji": flag}
@time.head("/time/")
async def read_root():
return {}
@time.get("/time/")
async def root():
return {"message": "Ring, ring, ring, ring, ring, ring, ring. \n\n. Banana phone."}

406
sijapi/routers/tts.py Normal file
View file

@ -0,0 +1,406 @@
from fastapi import APIRouter, UploadFile, HTTPException, Response, Form, File, BackgroundTasks, Depends, Request
from fastapi.responses import Response, StreamingResponse, FileResponse
from fastapi.responses import StreamingResponse, PlainTextResponse
import requests
import json
import shutil
from io import BytesIO
import asyncio
from pydantic import BaseModel
from typing import Optional, Union, List
from pydub import AudioSegment
from TTS.api import TTS
from pathlib import Path
from datetime import datetime
from time import time
import torch
import traceback
import hashlib
import uuid
import httpx
import tempfile
import random
import re
import os
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, DATA_DIR, DEFAULT_VOICE, TTS_DIR, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY
from sijapi.utilities import sanitize_filename
### INITIALIZATIONS ###
tts = APIRouter(tags=["trusted", "private"])
DEVICE = torch.device('cpu')
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
@tts.get("/tts/local_voices", response_model=List[str])
async def list_wav_files():
wav_files = [file.split('.')[0] for file in os.listdir(VOICE_DIR) if file.endswith(".wav")]
return wav_files
@tts.get("/tts/elevenlabs_voices")
async def list_11l_voices():
formatted_list = ""
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers)
DEBUG(f"Response: {response}")
if response.status_code == 200:
voices_data = response.json().get("voices", [])
formatted_list = ""
for voice in voices_data:
name = voice["name"]
id = voice["voice_id"]
formatted_list += f"{name}: `{id}`\n"
except Exception as e:
ERR(f"Error determining voice ID: {str(e)}")
return PlainTextResponse(formatted_list, status_code=200)
def select_voice(voice_name: str) -> str:
try:
voice_file = VOICE_DIR / f"{voice_name}.wav"
DEBUG(f"select_voice received query to use voice: {voice_name}. Looking for {voice_file} inside {VOICE_DIR}.")
if voice_file.is_file():
return str(voice_file)
else:
raise HTTPException(status_code=404, detail="Voice file not found")
except Exception as e:
ERR(f"Voice file not found: {str(e)}")
ERR(traceback.format_exc())
raise HTTPException(status_code=404, detail="Voice file not found")
@tts.post("/tts/speak")
@tts.post("/v1/audio/speech")
async def generate_speech_endpoint(
request: Request,
background_tasks: BackgroundTasks,
model: str = Form("eleven_turbo_v2"),
text: Optional[str] = Form(None),
file: Optional[UploadFile] = File(None),
voice: Optional[str] = Form(None),
voice_file: Optional[UploadFile] = File(None),
speed: Optional[float] = Form(1.1),
podcast: Union[bool, str] = Form(False),
stream: bool = Form(True)
):
try:
podcast = podcast if isinstance(podcast, bool) else podcast.lower() == 'true'
text_content = await get_text_content(text, file)
if stream:
model = model if model else await get_model(voice, voice_file)
if model == "eleven_turbo_v2":
voice_id = await determine_voice_id(voice)
audio_stream = await get_audio_stream(model, text_content, voice_id)
return StreamingResponse(audio_stream, media_type="audio/mpeg")
else:
return await stream_tts(text_content, speed, voice, voice_file)
else:
return await generate_speech(background_tasks, text_content, voice, voice_file, model, speed, podcast)
except Exception as e:
ERR(f"Error in TTS: {str(e)}")
ERR(traceback.format_exc())
raise HTTPException(status_code=666, detail="error in TTS")
async def generate_speech(
background_tasks: BackgroundTasks,
text: str,
voice: str = None,
voice_file: UploadFile = None,
model: str = None,
speed: float = 1.1,
podcast: bool = False,
title: str = None,
output_dir = None
) -> str:
output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
if not output_dir.exists():
output_dir.mkdir(parents=True)
try:
model = model if model else await get_model(voice, voice_file)
if model == "eleven_turbo_v2":
INFO(f"Using ElevenLabs.")
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
return str(audio_file_path)
elif model == "xtts":
INFO(f"Using XTTS2")
final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, background_tasks, title, output_dir)
background_tasks.add_task(os.remove, str(final_output_dir))
return str(final_output_dir)
else:
raise HTTPException(status_code=400, detail="Invalid model specified")
except HTTPException as e:
ERR(f"HTTP error: {e}")
ERR(traceback.format_exc())
raise e
except Exception as e:
ERR(f"Error: {e}")
ERR(traceback.format_exc())
raise e
async def get_model(voice: str = None, voice_file: UploadFile = None):
if voice_file or (voice and select_voice(voice)):
return "xtts"
elif voice and await determine_voice_id(voice):
return "eleven_turbo_v2"
else:
raise HTTPException(status_code=400, detail="No model or voice specified")
async def determine_voice_id(voice_name: str) -> str:
hardcoded_voices = {
"alloy": "E3A1KVbKoWSIKSZwSUsW",
"echo": "b42GBisbu9r5m5n6pHF7",
"fable": "KAX2Y6tTs0oDWq7zZXW7",
"onyx": "clQb8NxY08xZ6mX6wCPE",
"nova": "6TayTBKLMOsghG7jYuMX",
"shimmer": "E7soeOyjpmuZFurvoxZ2",
DEFAULT_VOICE: "6TayTBKLMOsghG7jYuMX",
"Sangye": "E7soeOyjpmuZFurvoxZ2",
"Herzog": "KAX2Y6tTs0oDWq7zZXW7",
"Attenborough": "b42GBisbu9r5m5n6pHF7"
}
if voice_name in hardcoded_voices:
voice_id = hardcoded_voices[voice_name]
DEBUG(f"Found voice ID - {voice_id}")
return voice_id
DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.")
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers)
DEBUG(f"Response: {response}")
if response.status_code == 200:
voices_data = response.json().get("voices", [])
for voice in voices_data:
if voice_name == voice["voice_id"] or voice_name == voice["name"]:
return voice["voice_id"]
except Exception as e:
ERR(f"Error determining voice ID: {str(e)}")
return "6TayTBKLMOsghG7jYuMX"
async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = None, output_dir: str = None):
voice_id = await determine_voice_id(voice)
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
payload = {
"text": input_text,
"model_id": model
}
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
response = await client.post(url, json=payload, headers=headers)
output_dir = output_dir if output_dir else TTS_OUTPUT_DIR
title = title if title else datetime.now().strftime("%Y%m%d%H%M%S")
filename = f"{sanitize_filename(title)}.mp3"
file_path = Path(output_dir) / filename
if response.status_code == 200:
with open(file_path, "wb") as audio_file:
audio_file.write(response.content)
return file_path
else:
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str:
if file:
return (await file.read()).decode("utf-8").strip()
elif text:
return text.strip()
else:
raise HTTPException(status_code=400, detail="No text provided")
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
if voice:
return select_voice(voice)
elif voice_file and isinstance(voice_file, UploadFile):
VOICE_DIR.mkdir(exist_ok=True)
content = await voice_file.read()
checksum = hashlib.md5(content).hexdigest()
existing_file = VOICE_DIR / voice_file.filename
if existing_file.is_file():
with open(existing_file, 'rb') as f:
existing_checksum = hashlib.md5(f.read()).hexdigest()
if checksum == existing_checksum:
return str(existing_file)
base_name = existing_file.stem
counter = 1
new_file = existing_file
while new_file.is_file():
new_file = VOICE_DIR / f"{base_name}{counter:02}.wav"
counter += 1
with open(new_file, 'wb') as f:
f.write(content)
return str(new_file)
else:
DEBUG(f"{datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
return select_voice(DEFAULT_VOICE)
async def local_tts(text_content: str, speed: float, voice: str, voice_file = None, podcast: bool = False, background_tasks: BackgroundTasks = None, title: str = None, output_path: Optional[Path] = None) -> str:
if output_path:
file_path = Path(output_path)
else:
datetime_str = datetime.now().strftime("%Y%m%d%H%M%S")
title = sanitize_filename(title) if title else "Audio"
filename = f"{datetime_str}_{title}.wav"
file_path = TTS_OUTPUT_DIR / filename
# Ensure the parent directory exists
file_path.parent.mkdir(parents=True, exist_ok=True)
voice_file_path = await get_voice_file_path(voice, voice_file)
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
segments = split_text(text_content)
combined_audio = AudioSegment.silent(duration=0)
for i, segment in enumerate(segments):
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
DEBUG(f"Segment file path: {segment_file_path}")
segment_file = await asyncio.to_thread(XTTS.tts_to_file, text=segment, speed=speed, file_path=str(segment_file_path), speaker_wav=[voice_file_path], language="en")
DEBUG(f"Segment file generated: {segment_file}")
combined_audio += AudioSegment.from_wav(str(segment_file))
# Delete the segment file immediately after adding it to the combined audio
segment_file_path.unlink()
if podcast:
podcast_file_path = PODCAST_DIR / file_path.name
combined_audio.export(podcast_file_path, format="wav")
combined_audio.export(file_path, format="wav")
return str(file_path)
async def stream_tts(text_content: str, speed: float, voice: str, voice_file) -> StreamingResponse:
voice_file_path = await get_voice_file_path(voice, voice_file)
segments = split_text(text_content)
async def audio_stream_generator():
for segment in segments:
segment_file = await generate_tts(segment, speed, voice_file_path)
with open(segment_file, 'rb') as f:
while chunk := f.read(1024):
yield chunk
os.remove(segment_file)
return StreamingResponse(audio_stream_generator(), media_type='audio/wav')
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en")
return output_dir
async def get_audio_stream(model: str, input_text: str, voice: str):
voice_id = await determine_voice_id(voice)
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
payload = {
"text": input_text,
"model_id": "eleven_turbo_v2"
}
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200:
return response.iter_content(1024)
else:
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
def split_text(text, target_length=35, max_length=50):
text = clean_text_for_tts(text)
sentences = re.split(r'(?<=[.!?"])\s+', text)
segments = []
current_segment = []
for sentence in sentences:
sentence_words = sentence.split()
segment_length = len(' '.join(current_segment).split())
if segment_length + len(sentence_words) > max_length:
segments.append(' '.join(current_segment))
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
current_segment = [sentence]
else:
current_segment.extend(sentence_words)
if current_segment:
segments.append(' '.join(current_segment))
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
return segments
def clean_text_for_tts(text: str) -> str:
if text is not None:
text = text.replace("\n", " ").replace("\r", " ")
text = re.sub(r"[^\w\s.,;:!?'\"]", '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
else:
DEBUG(f"No text received.")
def copy_to_podcast_dir(file_path):
try:
# Extract the file name from the file path
file_name = Path(file_path).name
# Construct the destination path in the PODCAST_DIR
destination_path = PODCAST_DIR / file_name
# Copy the file to the PODCAST_DIR
shutil.copy(file_path, destination_path)
print(f"File copied successfully to {destination_path}")
except FileNotFoundError:
print(f"File not found: {file_path}")
except shutil.SameFileError:
print(f"Source and destination are the same file: {file_path}")
except PermissionError:
print(f"Permission denied while copying the file: {file_path}")
except Exception as e:
print(f"An error occurred while copying the file: {file_path}")
print(f"Error details: {str(e)}")

265
sijapi/routers/weather.py Normal file
View file

@ -0,0 +1,265 @@
import asyncio
from fastapi import APIRouter, HTTPException
from fastapi import HTTPException
from asyncpg.cursor import Cursor
from httpx import AsyncClient
from typing import Dict
from datetime import datetime
from shapely.wkb import loads
from binascii import unhexlify
from sijapi.utilities import localize_dt
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import VISUALCROSSING_API_KEY, TZ
from sijapi.utilities import get_db_connection, haversine
from sijapi.routers import locate
weather = APIRouter()
async def get_weather(date_time: datetime, latitude: float, longitude: float):
# request_date_str = date_time.strftime("%Y-%m-%d")
DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
fetch_new_data = True
if daily_weather_data:
try:
DEBUG(f"Daily weather data from db: {daily_weather_data}")
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
last_updated = localize_dt(last_updated)
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
stored_loc = loads(stored_loc_data)
stored_lat = stored_loc.y
stored_lon = stored_loc.x
stored_ele = stored_loc.z
hourly_weather = daily_weather_data.get('HourlyWeather')
DEBUG(f"Hourly: {hourly_weather}")
DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n")
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
if last_updated and (date_time <= datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
DEBUG(f"We can use existing data... :')")
fetch_new_data = False
except Exception as e:
ERR(f"Error in get_weather: {e}")
if fetch_new_data:
DEBUG(f"We require new data!")
request_date_str = date_time.strftime("%Y-%m-%d")
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
try:
async with AsyncClient() as client:
response = await client.get(url)
if response.status_code == 200:
DEBUG(f"Successfully obtained data from VC...")
try:
weather_data = response.json()
store_result = await store_weather_to_db(date_time, weather_data)
if store_result == "SUCCESS":
DEBUG(f"New weather data for {request_date_str} stored in database...")
else:
ERR(f"Failed to store weather data for {request_date_str} in database! {store_result}")
DEBUG(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data is not None:
return daily_weather_data
else:
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
except Exception as e:
ERR(f"Problem parsing VC response or storing data: {e}")
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
else:
ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}")
except Exception as e:
ERR(f"Exception during API call: {e}")
return daily_weather_data
async def store_weather_to_db(date_time: datetime, weather_data: dict):
conn = await get_db_connection()
try:
day_data = weather_data.get('days')[0]
DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}")
# Handle preciptype and stations as PostgreSQL arrays
preciptype_array = day_data.get('preciptype', []) or []
stations_array = day_data.get('stations', []) or []
date_str = date_time.strftime("%Y-%m-%d")
# Get location details from weather data if available
longitude = weather_data.get('longitude')
latitude = weather_data.get('latitude')
elevation = locate.get_elevation(latitude, longitude) # 152.4 # default until we add a geocoder that can look up actual elevation; weather_data.get('elevation') # assuming 'elevation' key, replace if different
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
# Correct for the datetime objects
day_data['datetime'] = localize_dt(day_data.get('datetime')) #day_data.get('datetime'))
day_data['sunrise'] = day_data['datetime'].replace(hour=int(day_data.get('sunrise').split(':')[0]), minute=int(day_data.get('sunrise').split(':')[1]))
day_data['sunset'] = day_data['datetime'].replace(hour=int(day_data.get('sunset').split(':')[0]), minute=int(day_data.get('sunset').split(':')[1]))
daily_weather_params = (
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
day_data.get('sunset'), day_data.get('sunsetEpoch'),
day_data.get('description'), day_data.get('tempmax'),
day_data.get('tempmin'), day_data.get('uvindex'),
day_data.get('winddir'), day_data.get('windspeed'),
day_data.get('icon'), datetime.now(),
day_data.get('datetime'), day_data.get('datetimeEpoch'),
day_data.get('temp'), day_data.get('feelslikemax'),
day_data.get('feelslikemin'), day_data.get('feelslike'),
day_data.get('dew'), day_data.get('humidity'),
day_data.get('precip'), day_data.get('precipprob'),
day_data.get('precipcover'), preciptype_array,
day_data.get('snow'), day_data.get('snowdepth'),
day_data.get('windgust'), day_data.get('pressure'),
day_data.get('cloudcover'), day_data.get('visibility'),
day_data.get('solarradiation'), day_data.get('solarenergy'),
day_data.get('severerisk', 0), day_data.get('moonphase'),
day_data.get('conditions'), stations_array, day_data.get('source'),
location_point
)
except Exception as e:
ERR(f"Failed to prepare database query in store_weather_to_db! {e}")
try:
daily_weather_query = '''
INSERT INTO DailyWeather (
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
dew, humidity, precip, precipprob, precipcover, preciptype,
snow, snowdepth, windgust, pressure, cloudcover, visibility,
solarradiation, solarenergy, severerisk, moonphase, conditions,
stations, source, location
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38)
RETURNING id
'''
# Debug logs for better insights
# DEBUG("Executing query: %s", daily_weather_query)
# DEBUG("With parameters: %s", daily_weather_params)
# Execute the query to insert daily weather data
async with conn.transaction():
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
if 'hours' in day_data:
for hour_data in day_data['hours']:
try:
await asyncio.sleep(0.1)
# hour_data['datetime'] = parse_date(hour_data.get('datetime'))
hour_timestamp = date_str + ' ' + hour_data['datetime']
hour_data['datetime'] = localize_dt(hour_timestamp)
DEBUG(f"Processing hours now...")
# DEBUG(f"Processing {hour_data['datetime']}")
hour_preciptype_array = hour_data.get('preciptype', []) or []
hour_stations_array = hour_data.get('stations', []) or []
hourly_weather_params = (
daily_weather_id,
hour_data['datetime'],
hour_data.get('datetimeEpoch'),
hour_data['temp'],
hour_data['feelslike'],
hour_data['humidity'],
hour_data['dew'],
hour_data['precip'],
hour_data['precipprob'],
hour_preciptype_array,
hour_data['snow'],
hour_data['snowdepth'],
hour_data['windgust'],
hour_data['windspeed'],
hour_data['winddir'],
hour_data['pressure'],
hour_data['cloudcover'],
hour_data['visibility'],
hour_data['solarradiation'],
hour_data['solarenergy'],
hour_data['uvindex'],
hour_data.get('severerisk', 0),
hour_data['conditions'],
hour_data['icon'],
hour_stations_array,
hour_data.get('source', ''),
)
try:
hourly_weather_query = '''
INSERT INTO HourlyWeather (daily_weather_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob,
preciptype, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy,
uvindex, severerisk, conditions, icon, stations, source)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26)
RETURNING id
'''
# Debug logs for better insights
# DEBUG("Executing query: %s", hourly_weather_query)
# DEBUG("With parameters: %s", hourly_weather_params)
# Execute the query to insert hourly weather data
async with conn.transaction():
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
# ERR(f"\n{hourly_weather_id}")
except Exception as e:
ERR(f"EXCEPTION: {e}")
except Exception as e:
ERR(f"EXCEPTION: {e}")
return "SUCCESS"
except Exception as e:
ERR(f"Error in dailyweather storage: {e}")
async def get_weather_from_db(date_time: datetime, latitude: float, longitude: float):
conn = await get_db_connection()
query_date = date_time.date()
try:
# Query to get daily weather data
query = '''
SELECT DW.* FROM DailyWeather DW
WHERE DW.datetime::date = $1
AND ST_DWithin(DW.location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
ORDER BY ST_Distance(DW.location, ST_MakePoint($4, $5)::geography) ASC
LIMIT 1
'''
daily_weather_data = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
if daily_weather_data is None:
DEBUG(f"No daily weather data retrieved from database.")
return None
# else:
# DEBUG(f"Daily_weather_data: {daily_weather_data}")
# Query to get hourly weather data
query = '''
SELECT HW.* FROM HourlyWeather HW
WHERE HW.daily_weather_id = $1
'''
hourly_weather_data = await conn.fetch(query, daily_weather_data['id'])
day: Dict = {
'DailyWeather': dict(daily_weather_data),
'HourlyWeather': [dict(row) for row in hourly_weather_data],
}
# DEBUG(f"day: {day}")
return day
except Exception as e:
ERR(f"Unexpected error occurred: {e}")

427
sijapi/utilities.py Normal file
View file

@ -0,0 +1,427 @@
import re
import os
from fastapi import Form
import re
import io
from io import BytesIO
import base64
import math
from dateutil import parser
from pathlib import Path
import filetype
from PyPDF2 import PdfReader
from pdfminer.high_level import extract_text as pdfminer_extract_text
import pytesseract
from pdf2image import convert_from_path
from datetime import datetime, date, time
from typing import Optional, Union, Tuple
import asyncio
from PIL import Image
from dateutil.parser import parse as dateutil_parse
from docx import Document
import asyncpg
from sshtunnel import SSHTunnelForwarder
from fastapi import Depends, HTTPException, Request, UploadFile
from fastapi.security.api_key import APIKeyHeader
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import DB, GLOBAL_API_KEY, DB, DB_HOST, DB_PORT, DB_USER, DB_PASS, TZ, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR
api_key_header = APIKeyHeader(name="Authorization")
def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
if request.url.path not in ["/health", "/ip", "/pgp"]:
api_key_query = request.query_params.get("api_key")
if api_key_header:
api_key = api_key.lower().split("bearer ")[-1]
if api_key != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
raise HTTPException(status_code=401, detail="Invalid or missing API key")
def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str = None, extension: str = None, no_timestamp: bool = False) -> Tuple[Path, Path]:
'''
Obsidian helper. Takes a datetime and optional subdirectory name, filename, and extension.
If an extension is provided, it ensures the path is to a file with that extension.
If no extension is provided, it treats the path as a directory.
'''
year = date_time.strftime(YEAR_FMT)
month = date_time.strftime(MONTH_FMT)
day = date_time.strftime(DAY_FMT)
day_short = date_time.strftime(DAY_SHORT_FMT)
timestamp = date_time.strftime("%H%M%S")
relative_path = Path("journal") / year / month / day
if not subdir and not filename and not extension:
# standard daily note handler, where only the date_time was specified:
relative_path = relative_path / f"{day}.md"
else:
if subdir:
# datestamped subdirectory handler
relative_path = relative_path / f"{day_short} {subdir}"
if filename:
if no_timestamp:
filename = f"{day_short} {sanitize_filename(filename)}"
else:
filename = f"{day_short} {timestamp} {sanitize_filename(filename)}"
if extension:
extension = extension if extension.startswith(".") else f".{extension}"
filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
relative_path = relative_path / filename
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
os.makedirs(absolute_path.parent, exist_ok=True)
return absolute_path, relative_path
def prefix_lines(text: str, prefix: str = '> ') -> str:
lines = text.split('\n')
prefixed_lines = [f"{prefix}{line.lstrip()}" for line in lines]
return '\n'.join(prefixed_lines)
def f(file):
if hasattr(file, 'read') and callable(file.read):
return file
if isinstance(file, (bytes, bytearray)):
return file
if isinstance(file, Path):
file_path = file
elif isinstance(file, str):
file_path = Path(file)
else:
raise TypeError("Invalid file type. Expected str, Path, or file-like object.")
with open(file_path, 'rb') as thefile:
return thefile
def get_extension(file):
try:
if isinstance(file, str):
file_path = Path(file)
elif isinstance(file, Path):
file_path = file
else:
file_path = Path(file.filename)
file_extension = file_path.suffix
return file_extension
except Exception as e:
ERR(f"Unable to get extension of {file}")
raise e
def sanitize_filename(text, max_length=255):
"""Sanitize a string to be used as a safe filename."""
DEBUG(f"Filename before sanitization: {text}")
sanitized = re.sub(r'[^\w\s\.-]', '', text).strip()
final_filename = sanitized[:max_length]
DEBUG(f"Filename after sanitization: {final_filename}")
return final_filename
def bool_convert(value: str = Form(None)):
return value.lower() in ["true", "1", "t", "y", "yes"]
def str_to_bool(value: str) -> bool:
"""
Convert a string to a boolean.
Interprets 'true', '1', 'yes', 'y' as True.
Interprets 'false', '0', 'no', 'n', '', or any other string as False.
"""
def get_timestamp():
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
async def extract_text(file_path: str) -> str:
"""Extract text from file."""
if file_path.endswith('.pdf'):
return await extract_text_from_pdf(file_path)
elif file_path.endswith('.docx'):
return await extract_text_from_docx(file_path)
def clean_text(text):
text = text.replace('-', '')
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'[\u200B-\u200D\uFEFF]', '', text)
return text.strip()
async def ocr_pdf(file_path: str) -> str:
try:
images = await asyncio.to_thread(convert_from_path, file_path)
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
return ' '.join(texts)
except Exception as e:
ERR(f"Error during OCR: {str(e)}")
return ""
async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path):
ERR(f"Invalid PDF file: {file_path}")
return ""
text = ''
num_pages = 0
# First, attempt to extract text using PyPDF2
try:
reader = await asyncio.to_thread(PdfReader, file_path)
for page in reader.pages:
text_content = page.extract_text() + ' ' if page.extract_text() else ''
text += text_content
num_pages = len(reader.pages)
# If text was extracted successfully and it's deemed sufficient, return it
if text and not should_use_ocr(text, num_pages):
return clean_text(text)
except Exception as e:
ERR(f"Error extracting text with PyPDF2: {str(e)}")
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
return clean_text(text_pdfminer)
except Exception as e:
ERR(f"Error extracting text with pdfminer.six: {e}")
# If both methods fail or are deemed insufficient, use OCR as the last resort
INFO("Falling back to OCR for text extraction...")
return await ocr_pdf(file_path)
async def is_valid_pdf(file_path: str) -> bool:
"""Check if the file at file_path is a valid PDF."""
try:
kind = filetype.guess(file_path)
return kind.mime == 'application/pdf'
except Exception as e:
ERR(f"Error checking file type: {e}")
return False
async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path):
ERR(f"Invalid PDF file: {file_path}")
return ""
text = ''
try:
reader = await asyncio.to_thread(PdfReader, file_path)
for page in reader.pages:
text_content = page.extract_text() + ' ' if page.extract_text() else ''
text += text_content
if text.strip(): # Successfully extracted text
return clean_text(text)
except Exception as e:
ERR(f"Error extracting text with PyPDF2: {str(e)}")
try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer.strip(): # Successfully extracted text
return clean_text(text_pdfminer)
except Exception as e:
ERR(f"Error extracting text with pdfminer.six: {str(e)}")
# Fall back to OCR
INFO("Falling back to OCR for text extraction...")
try:
images = convert_from_path(file_path)
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
return ' '.join(ocr_texts).strip()
except Exception as e:
ERR(f"OCR failed: {str(e)}")
return ""
async def extract_text_from_docx(file_path: str) -> str:
def read_docx(file_path):
doc = Document(file_path)
full_text = [paragraph.text for paragraph in doc.paragraphs]
return '\n'.join(full_text)
return await asyncio.to_thread(read_docx, file_path)
# Correcting read_text_file to be asynchronous
async def read_text_file(file_path: str) -> str:
# This opens and reads a file asynchronously by offloading to a separate thread
return await asyncio.to_thread(_sync_read_text_file, file_path)
def _sync_read_text_file(file_path: str) -> str:
# Actual synchronous file reading operation
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def should_use_ocr(text, num_pages) -> bool:
if not text:
return True # No text was extracted, use OCR
word_count = len(text.split())
avg_words_per_page = word_count / num_pages
return avg_words_per_page < 10
def convert_to_unix_time(iso_date_str):
dt = parser.parse(iso_date_str) # Automatically parses datetime with timezone
return int(dt.timestamp())
async def get_db_connection():
conn = await asyncpg.connect(
database=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
return conn
temp = """
def get_db_connection_ssh(ssh: bool = True):
if ssh:
with SSHTunnelForwarder(
(DB_SSH, 22),
DB_SSH_USER=DB_SSH_USER,
DB_SSH_PASS=DB_SSH_PASS,
remote_bind_address=DB_SSH,
local_bind_address=(DB_HOST, DB_PORT)
) as tunnel: conn = psycopg2.connect(
dbname=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
else:
conn = psycopg2.connect(
dbname=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
return conn
"""
def db_localized():
# ssh = True if TS_IP == DB_SSH else False
return get_db_connection()
def haversine(lat1, lon1, lat2, lon2):
""" Calculate the great circle distance between two points on the earth specified in decimal degrees. """
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
c = 2 * math.asin(math.sqrt(a))
r = 6371 # Radius of Earth in kilometers
return c * r
def convert_degrees_to_cardinal(d):
"""
Convert degrees to cardinal directions
"""
dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
ix = round(d / (360. / len(dirs)))
return dirs[ix % len(dirs)]
def localize_dt(dt):
initial_dt = dt
try:
if isinstance(dt, str):
dt = dateutil_parse(dt)
DEBUG(f"{initial_dt} was a string so we attempted converting to datetime. Result: {dt}")
if isinstance(dt, datetime):
DEBUG(f"{dt} is a datetime object, so we will ensure it is tz-aware.")
if dt.tzinfo is None:
dt = dt.replace(tzinfo=TZ)
# DEBUG(f"{dt} should now be tz-aware. Returning it now.")
return dt
else:
# DEBUG(f"{dt} already was tz-aware. Returning it now.")
return dt
else:
ERR(f"Conversion failed")
raise TypeError("Conversion failed")
except Exception as e:
ERR(f"Error parsing datetime: {e}")
raise TypeError("Input must be a string or datetime object")
HOURLY_COLUMNS_MAPPING = {
"12am": "00:00:00",
"2am": "02:00:00",
"4am": "04:00:00",
"6am": "06:00:00",
"8am": "08:00:00",
"10am": "10:00:00",
"12pm": "12:00:00",
"2pm": "14:00:00",
"4pm": "16:00:00",
"6pm": "18:00:00",
"8pm": "20:00:00",
"10pm": "22:00:00",
}
def convert_to_12_hour_format(datetime_obj_or_str):
if isinstance(datetime_obj_or_str, str):
try:
datetime_obj = datetime.strptime(datetime_obj_or_str, "%Y-%m-%d %H:%M:%S")
except ValueError:
try:
datetime_obj = datetime.strptime(datetime_obj_or_str, "%H:%M:%S")
except ValueError:
return "Invalid datetime string format"
elif isinstance(datetime_obj_or_str, time):
datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S")
else:
datetime_obj = datetime_obj_or_str
if isinstance(datetime_obj_or_str, str):
time24 = datetime_obj_or_str
else:
time24 = datetime_obj.strftime("%H:%M:%S")
reverse_mapping = {v: k for k, v in HOURLY_COLUMNS_MAPPING.items()}
return reverse_mapping.get(time24, "Invalid time")
def encode_image_to_base64(image_path):
if os.path.exists(image_path):
with Image.open(image_path) as image:
output_buffer = BytesIO()
image.save(output_buffer, format='JPEG')
byte_data = output_buffer.getvalue()
base64_str = base64.b64encode(byte_data).decode('utf-8')
return base64_str
else:
DEBUG(f"Error: File does not exist at {image_path}")
def resize_and_convert_image(image_path, max_size=2160, quality=80):
with Image.open(image_path) as img:
# Resize image
ratio = max_size / max(img.size)
new_size = tuple([int(x * ratio) for x in img.size])
img = img.resize(new_size, Image.Resampling.LANCZOS)
# Convert to jpg
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='JPEG', quality=quality)
img_byte_arr = img_byte_arr.getvalue()
return img_byte_arr