Initial commit

This commit is contained in:
sanj 2024-06-23 14:51:45 -07:00
commit 0fdd15fc72
20 changed files with 5606 additions and 0 deletions

56
.gitignore vendored Normal file
View file

@ -0,0 +1,56 @@
\# Ignore specific data files and directories
sijapi/data/calendar.ics
sijapi/data/asr/
sijapi/data/geocoder/
sijapi/data/courtlistener/
sijapi/data/tts/
sijapi/data/db/
sijapi/data/sd/workflows/private
sijapi/data/*.pbf
sijapi/data/geonames.txt
sijapi/data/sd/images/
sijapi/config/O365/
sijapi/local_only/
sijapi/testbed/
**/.env
**/.config.yaml
**/*.log
**/logs/
**/__pycache__
**/.DS_Store
**/*.ics
**/*.sqlite
**/private/
**/*sync-conflict*.*
**/*.db
**/*.mp3
**/*.mp4
**/*.wav
**/*.pyc
**/.ipynb_checkpoints/
venv/
env/
.venv/
.vscode/
.idea/
*~
*.swp
*.swo
*.com
*.class
*.dll
*.exe
*.o
*.so
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
ehthumbs.db
Thumbs.db
sijapi/testbed/

17
README.md Normal file
View file

@ -0,0 +1,17 @@
```
.x+=:. . .
z` ^% @88> .. @88>
. <k %8P 888> .d`` %8P
.@8Ned8" . "8P u @8Ne. .u .
.@^%8888" .@88u . us888u. %8888:u@88N .@88u
x88: `)8b. ''888E` u888u. .@88 "8888" `888I 888. ''888E`
8888N=*8888 888E `'888E 9888 9888 888I 888I 888E
%8" R88 888E 888E 9888 9888 888I 888I 888E
@8Wou 9% 888E 888E 9888 9888 uW888L 888' 888E
.888888P` 888& 888E 9888 9888 '*88888Nu88P 888&
` ^"F R888" 888E "888*""888"~ '88888F` R888"
"" 888E ^Y" ^Y' 888 ^ ""
888E *8E
888P '8>
.J88" "
```

47
requirements.txt Normal file
View file

@ -0,0 +1,47 @@
python-dotenv
setuptools
PyPDF2
fastapi
pdf2image
pdfminer
pytesseract
python-dateutil
python-docx
hypercorn
starlette
httpx
pydantic
pytz
requests
aiohttp
paramiko
tailscale
pandas
pydub
torch
selenium
webdriver_manager
faster_whisper
filetype
html2text
markdown
ollama
aiofiles
bs4
imbox
newspaper3k
python-magic
urllib3
whisper
huggingface_hub
numpy
tqdm
tiktoken
numba
scipy
vectordb
IPython
torchaudio
lxml
lxml_html_clean
pdfminer.six

61
setup.py Normal file
View file

@ -0,0 +1,61 @@
from setuptools import setup, find_packages
setup(
name='sijapi',
version='0.1',
packages=find_packages(),
entry_points={
'console_scripts': [
'sijapi = sijapi.__main__:main',
],
},
install_requires=[
'fastapi',
'python-dotenv',
'hypercorn',
'setuptools',
'PyPDF2',
'pdf2image',
'pdfminer',
'pytesseract',
'python-dateutil',
'python-docx',
'starlette',
'httpx',
'pydantic',
'pytz',
'requests',
'aiohttp',
'paramiko',
'tailscale',
'pandas',
'pydub',
'torch',
'selenium',
'webdriver_manager',
'faster_whisper',
'filetype',
'html2text',
'markdown',
'ollama',
'aiofiles',
'bs4',
'pdfminer.six',
'lxml_html_clean',
'imbox',
'newspaper3k',
'python-magic',
'urllib3',
'whisper',
'huggingface_hub',
'numpy',
'tqdm',
'tiktoken',
'numba',
'scipy',
'vectordb',
'IPython',
'torchaudio'
],
)

92
sij.asc Normal file
View file

@ -0,0 +1,92 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
mQINBGY+fL4BEADCpz8FAfa6/7i9mEQCYlwwP2k9DlrUzz+u9BL4BmuoTEcGty9M
7EA2ivRxXo371IIMjL/GyAa8I3WHMEhxuRlGldUQaHzo6PicTn+OiLJ/g2vCfStN
jIYog3WC25P7Es1n1hDuOu8rUL93twXZ4NevgYx+G44M7Q+/1AbSXf83kpawlHhg
HcGmH2vt9UulfTGAvN9s2sH2pn89812lpWLSdPARNw09ePZy4RdiEgJ6t+S+wjaE
Ue/H4FcQC1MLrQnkW5soUOduY9HN0iUk/xZqqkRQctl3ds5oInE483vQsL0HKFvs
MB8lBdXTbVzxvpFe+fvT8d6hiZ/YgxIUEl1KZLDd3atqj+UREuG+LABZUKC4nSUP
EXneXUqi4qVCW9827K9/H+IKahe8OE+OrZAsSfLtsp4AznIxgyQbvpUZzCuRASJN
Kt1cjcJBOv5L0HJ8tVykZd23WuKUXiyxTs1MxsDGyjew30IsAg4WNO/iw9vBO/Yu
pfjlZTcgbghdIuNmOrnCyzKWtUxxfDtWwEBBshbTKusOaGhauBaHrRVE7lKlTblM
x1JIzYBziDmFy25J1XvYb3guilk1yy54poLQaEcE54mQYWHKCNS4eQeL5dJR3Nmu
Pt9GXdMyNO3uyog3WYpyYqch+osbBsHFVNUClxMycnyqZzHQeZHPNJBzJwARAQAB
tC5TYW5neWUgSW5jZS1Kb2hhbm5zZW4gKEF0dG9ybmV5KSA8c2lqQHNpai5sYXc+
iQJXBBMBCABBAhsDBQkHhh8tBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMjqK
LEezdiJLNhO3U1smWu2+W0QFAmY+fPUCGQEACgkQU1smWu2+W0SwBQ/+L5S1fIop
6iQ/6gQENBNCUVgACWP0/ViJzQGo4iF3UZkV5KV8pgk/TenZSXCLxUj6UpSAe25m
vtrGV4NCL2hLn1NPK11Na6IM1ykfh/L67NKeCqmtQYwNLwW0o0fvUpK9fahPxhmv
EFo+lVCabQndgzmLxnUhxH4qkGSejsaSFoJQ6fVl/DExCL4w/R5rStnRMKDtkuF1
ONfjZpuLrAylx8Ypf/rocQYn5AJcRD5ZL2bGgDZNe85VNBFmD3b2cGSVpm3J6Rg/
fPfs1lgtpgXWbBDCF8nRY326Utbr3qoeZUXVQjVZ05Q2SpUYFHiDZJ3EFwQikg5n
cIBfcXQZQhTq/OK0eS0vB1li8m1ce9m8iMC+Pxe5toPkxFV5RO1+o5PG1SyOfzfV
F1c0O9JQqdJzRHoTuqLtVhlmRVBU2d6TjWYlZ6TwPShSTLu0Tkm4EeFJS4oag75d
q7LlIIvrWS4n3CqVpC/PEIUtclytkOkvNQaSWHEVkappS3UjkX1BJmaI8zXYh9jh
sV/5FckvwYnky+w6geFOBs34NW0rg9oNw4KNAywYcOPbI/Ev1z57my+MpA5msw+B
ww9sFC+tzQCSJl0FU2Dg2YMnyqfUtGr9HfXdAGuuUVh+cYFmEdwwZqBWl37pNIGL
SxfF1AdrlHCSpJcLVETe80UraMFAI7tyOwe0L1Nhbmd5ZSBJbmNlLUpvaGFubnNl
biA8c2FuZ3llaWpAd2VzdGVybmxhdy5vcmc+iQJUBBMBCAA+FiEEMjqKLEezdiJL
NhO3U1smWu2+W0QFAmY+fOgCGwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgEC
F4AACgkQU1smWu2+W0RlnBAArwaFta9NTRdubTqctv1EET1D9OXAE/R5vdSk2jRQ
1CMYmv6KeMm0Rl7+dNFet/vJOEtITF7TZHnt7WBy7n5m+SIoARsaZYEchjZKsE2g
6RvRWqFGYuUYQWTRKsw0b2tT16BaNLKdV/w3ndRQNS6wDJrW1dRnIWxm4z26d3/H
Rt3o8+LUVxdSWGLliKZU00S+FNPVSwWe/X7+CoIE7T5XZL+OIEJ6DfpK2pkHKT6D
FswF3KOLG36vz5eISk4AT+o9AEoFIpX0hce3DMixEYQSgKN230K8RchC59bO81zE
w7Mic4vpn/wKFhicn+0BA1aJzzOd8iEwiA0p5baq4b2xIwCBiO4uv/HXR1SN1Tfk
QozjAGzl8LzrmwGTWOtOSk/7ckPhPR2MGNhMdtJ7rPeHxImJLh+/f4uBmYnQUdw4
0j3sMpJmrShW5dXJ8YHqVFfqabYD8HkBztdYI0qGJDpQjEbW6V+DvMWQXOZ8c1ul
NN2vZyY25RkypMQLiphImJa+q6eGtBEas40MeAkgQKIBPBBpb6W1km+m6UnOADKB
0/vOWcZMgijyMPp7WvwXbOwmXI27rHsUTvhFDLPI113a9I5bU8j6VyW2s/sst3Xc
OQDzEgR3KvD4dWjczIg6yliIq9eM5hskpsYyfDfWRWrIbR3Tg8XPwnQRB9dPEHIy
rKS0KVNhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2FuZ3llQHJpc2V1cC5uZXQ+iQJU
BBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fQYCGwMFCQeGHy0FCwkI
BwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0SKGA//VRGpS7IwOOlHF7OI
+LEMDebLpLB2PswfWrK+sI9YdXXV/CaT0NcCz2HPCoK+coaDkl4cxh10ykVbjO36
wZc/rvhpzga2wMLpBLNwpTvSlfMwsCQeRQay498bgdR59gf1hYa/dPYKKrBgNxHa
Kc3dMDWU0adpV4zV1s/iFNQQZfmhUah+8TTlB03hahPzn8V7CqQF+jTfSXiWPv/V
eD1W6Sc1juvLTVxTThbM5ewiIhMP2t7KM+M4viOEqce79IcE2HTcpCaEI7Lh/Eld
9VBZZk/gENuPqyQuLbOIOQhC6LYRZkZC9Vv1FDutfWV5ZBPyaTY/n5pGW3lo+Tfa
FLSamQcD6dyiGm/ZyQbPUDt2aWhqRGr7VvvtfyXLazL9T9Y6ASr5UjLakPr5ihUz
B8InRch9ACPbu7QSIGFk9PQgHme2Cd/HMRLIALnkAmrafgDE+14Rlp9qI2nYhWdD
jkZcLalPXQCDBxUfj1q192Nn3wlKsDkDd2RWT7Mc2RJq2FR36KADPMtz2oJPSib4
eRgI40E9Wv+zqHDDTU2K/bLi3nmBHvKnXWXPyiBPVL+CAoAhkYHHJwNuRQfxlukq
heS4/CMBRB04foTeu2ltl6/sQdAIyBGKbOC6fMyhJFYbi16nWI6j7iw2XQnqyitu
jC8Pz14NfIAQTpKCVcV32Kn2k1+0I1Nhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2lq
QGVudi5lc3E+iQJUBBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fRIC
GwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0Rbxw/+
OMYnlyXvo146+3M6JGdvW36CWmc9ZcmaU+xJM3FnG91WNo5J8MnHl0Ks9BwjNWtm
VJgFEdi2EVpSLJnYdQyJILCNt8RAclYvbFHYUOIDEEC2yr5ZKt/odwYAXPxaqQ4O
Sj7R2GbLA52O8zGWfARBAnAQycrlBRjItdpzGeWgRST8O/ot/IkU7xsAKW72E2VB
9jlCahp5c01lEideVqzVhk3z6GzVz1NUKsglgEOmTIjld4mMs+4GX/93q0u1erKO
I7Q6RL6lfdc2opGi5jFMXGWhLLgX2SSsBFJRuSQGnTpbx3XWFS5uA+cku7Fh0fC0
MKr2vsY18Z6OqU0MdQm6ovIVcvhzIdGfnBU9Ct98DMiUhDCmx3o9XneWj1n7kWKM
gT8s8AvE27tidtkZApwIKHdUy6qfyqwRjxE+KdL6Eh48x3TVYep+wfSfPJ1eq9Ne
7WWXKUx6FGNH01hpQdTLbCYqmwMa03och1wwyi+0wc8rHe6k6y2tURtP3mINkDeV
u1QmVaGRDA2r7oDm9UsFeupGsbFBnTkQIfJgnrLRJFfN2FDJPZDcd/VS71AOSL5C
jY+Dr/WHYPWeN8MHXfG4r/P41wsrnAJEAzSvLRQ9GYCLPe825W+uDJx9eMePodFa
BeIBcM633WXpbIXHnRQhPDfTzejCejO6GoPE7PbtBBi5Ag0EZj58vgEQAPUqNOgQ
kAPd/S+nkGAfvnzC5UD6lVWaQTVL/xU2K1J8l11K5Ck4nq7oMKhzEitu0lA+jG7q
JVwXMj9+rnoGlbIYmmxCZYpSit930Mss3HjYU8IAF4nybGwc5+wO77cldk3WJTI0
EkFgiM4Jk6Gk/tRf1LgMIfJIUgm8MooPLqg2z5Pj+bbwxw42A20enEwtF3ivEETJ
wuJwsp5uCOAfzOGqqBvp19PMTPynUBuwEXCkJfb0CCz+5yhjoi6ZjCVXxjuoe2wN
jFwoYd8odfSuvC6Fh9qqXnjF7HZLxEyN7K1L/y/sWarsN01zbUUI3kZlnTuamDu4
LdZtl2q3QqDyxmzHIWLTa1qL0s3WooB7JJqBYaNmQjLHadoktZ4vfhl7kjXYsg+i
84oipL83u2cRHplpqnRk9qVwNdW01EObjNafWY6t3942sM4e/yOdQiaXlxivPuHV
VYwme6K53lmGcV3ipMWRpNkme+oKV/TdYTTdlDaLgC8ga5AW6poNoSp5UpNeOs0E
mxIZivpRQSCr3g+jScy0RdX/+tI1gWe+2ZIHFwR+1WsXvLXHyd1wVyH4vDxSf1bE
VRVsXLZDT/xMGDzNzAC76kzoIykrcndFiTbNzB/LjZJuls6fRdN07bTcymWEKYiP
Ia6iGdag6+ueoX4eDzbjCvldKtkfr/EhB7MfABEBAAGJAjwEGAEIACYWIQQyOoos
R7N2Iks2E7dTWyZa7b5bRAUCZj58vgIbDAUJB4YfLQAKCRBTWyZa7b5bRLZdEACk
AaXNVeywC9+X6bdwkKV5Jl6Hv238cGd58TuVbjd+tii1JazbKEqCAr5tTlGtrUZg
fyjM0z5sMKDSZ15paX4xDbDs+xdfMxLVdjmFlZgwTrrTSIx3ODxPo/sSeyrzGZrQ
hlZjOHP1Bvln0OTQwK0yE3Eaip0FhIpJA5FX3yrZfvza3St5leNOXsZgEri68cgf
mVhS9tBD2I9TpCVwgq5vRnloAMgtQBYr8N9glXBfs2WsPhU96HSSH88osJW+lCkG
vTtzQBEjnnSQ/ssHBYz4DfpsJe1fbM+9WVow6q2nkUhqg5TfdAt4H0ra2uPXnNz8
lvQObVHlw7T0w5UTzgBdlCyYplyTG2gcZi+UWzit6YH9DH82j1otcq3+3NlrKwo0
TSJKZNagiqgJNZ1mhJQTt3JDacFFkBBxLf6trruuyInRU1leo87hzHCxIlMbQPqh
ogtV+W9FHElVJwoTQi8YF+0AacZPzK8wJmlPLxBeqs+ULJ8H5wZxlEBB1Jj91/W9
6R8m2IUZCsXNNpYU+f7uB8x0RUS3pU8S7GcwdJmOa16Xc4VdfWugm4TTEtajeSYC
ek5j/2s/QkAum5slT2Y6Aam0Jj/IhsGHKVEnR6DS01mZqVeeu0giPFUO4ZX5C0n9
mAmw/ZUGIOj6ls3KMBHv4pqQI7nd00tW8eIMgKGgKQ==
=PhPl
-----END PGP PUBLIC KEY BLOCK-----

251
sijapi/__init__.py Normal file
View file

@ -0,0 +1,251 @@
import os
import json
from pathlib import Path
import ipaddress
import multiprocessing
from dotenv import load_dotenv
from dateutil import tz
from pathlib import Path
from pydantic import BaseModel
import traceback
import logging
from .logs import Logger
# from sijapi.config.config import load_config
# cfg = load_config()
### Initial initialization
BASE_DIR = Path(__file__).resolve().parent
CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = CONFIG_DIR / ".env"
LOGS_DIR = BASE_DIR / "logs"
# Create logger instance
package_logger = Logger(__name__, LOGS_DIR)
LOGGER = package_logger.get_logger()
def DEBUG(log_message): LOGGER.debug(log_message)
def INFO(log_message): LOGGER.info(log_message)
def WARN(log_message): LOGGER.warning(log_message)
def ERR(log_message):
LOGGER.error(log_message)
LOGGER.error(traceback.format_exc())
def CRITICAL(log_message):
LOGGER.critical(log_message)
LOGGER.critical(traceback.format_exc())
os.makedirs(LOGS_DIR, exist_ok=True)
load_dotenv(ENV_PATH)
### API essentials
ROUTERS = os.getenv('ROUTERS', '').split(',')
PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',')
GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY")
# HOST_NET and HOST_PORT comprise HOST, which is what the server will bind to
HOST_NET = os.getenv("HOST_NET", "127.0.0.1")
HOST_PORT = int(os.getenv("HOST_PORT", 4444))
HOST = f"{HOST_NET}:{HOST_PORT}"
BASE_URL = os.getenv("BASE_URL", f"http://{HOST}")
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
TRUSTED_SUBNETS = [ipaddress.ip_network(subnet.strip()) for subnet in os.getenv('TRUSTED_SUBNETS', '127.0.0.1/32').split(',')]
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
### Directories & general paths
HOME_DIR = Path.home()
ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts"
os.makedirs(ALERTS_DIR, exist_ok=True)
REQUESTS_DIR = LOGS_DIR / "requests"
os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
### Databases
DB = os.getenv("DB", 'sijdb')
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
DB_PORT = os.getenv("DB_PORT", 5432)
DB_USER = os.getenv("DB_USER", 'sij')
DB_PASS = os.getenv("DB_PASS")
DB_SSH = os.getenv("DB_SSH", "100.64.64.15")
DB_SSH_USER = os.getenv("DB_SSH_USER")
DB_SSH_PASS = os.getenv("DB_SSH_ENV")
DB_URL = f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB}'
### LOCATE AND WEATHER LOCALIZATIONS
USER_FULLNAME = os.getenv('USER_FULLNAME')
USER_BIO = os.getenv('USER_BIO')
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
HOME_ZIP = os.getenv("HOME_ZIP") # unimplemented
LOCATION_OVERRIDES = DATA_DIR / "loc_overrides.json"
LOCATIONS_CSV = DATA_DIR / "US.csv"
# DB = DATA_DIR / "weatherlocate.db" # deprecated
VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline")
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
### Obsidian & notes
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or HOME_DIR / "Nextcloud" / "notes")
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
OBSIDIAN_BANNER_DIR = f"{OBSIDIAN_RESOURCES_DIR}/banners"
os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_BANNER_DIR, exist_ok=True)
OBSIDIAN_BANNER_SCENE = os.getenv("OBSIDIAN_BANNER_SCENE", "wallpaper")
OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian")
DOC_DIR = DATA_DIR / "docs"
os.makedirs(DOC_DIR, exist_ok=True)
### DATETIME SCHEMA FOR DAILY NOTE FOLDER HIERARCHY FORMATTING ###
YEAR_FMT = os.getenv("YEAR_FMT")
MONTH_FMT = os.getenv("MONTH_FMT")
DAY_FMT = os.getenv("DAY_FMT")
DAY_SHORT_FMT = os.getenv("DAY_SHORT_FMT")
### Large language model
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "dolphin-mistral")
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "Luna")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
### Stable diffusion
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
os.makedirs(SD_WORKFLOWS_DIR, exist_ok=True)
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py')
SD_CONFIG_PATH = CONFIG_DIR / 'sd.json'
with open(SD_CONFIG_PATH, 'r') as SD_CONFIG_file:
SD_CONFIG = json.load(SD_CONFIG_file)
### Summarization
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 100)) # measured in tokens
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
SUMMARY_INSTRUCT = os.getenv("SUMMARY_INSTRUCT", "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "llama3")
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 4096))
### ASR
ASR_DIR = DATA_DIR / "asr"
os.makedirs(ASR_DIR, exist_ok=True)
WHISPER_CPP_DIR = HOME_DIR / str(os.getenv("WHISPER_CPP_DIR"))
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'NULL,VOID').split(',')
### TTS
PREFERRED_TTS = os.getenv("PREFERRED_TTS", "None")
TTS_DIR = DATA_DIR / "tts"
os.makedirs(TTS_DIR, exist_ok=True)
VOICE_DIR = TTS_DIR / 'voices'
os.makedirs(VOICE_DIR, exist_ok=True)
PODCAST_DIR = TTS_DIR / "sideloads"
os.makedirs(PODCAST_DIR, exist_ok=True)
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
### Calendar & email account
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
class IMAP_DETAILS(BaseModel):
email: str
password: str
host: str
imap_port: int
smtp_port: int
imap_encryption: str = None
smtp_encryption: str = None
IMAP = IMAP_DETAILS(
email = os.getenv('IMAP_EMAIL'),
password = os.getenv('IMAP_PASSWORD'),
host = os.getenv('IMAP_HOST', '127.0.0.1'),
imap_port = int(os.getenv('IMAP_PORT', 1143)),
smtp_port = int(os.getenv('SMTP_PORT', 469)),
imap_encryption = os.getenv('IMAP_ENCRYPTION', None),
smtp_encryption = os.getenv('SMTP_ENCRYPTION', None)
)
AUTORESPONSE_WHITELIST = os.getenv('AUTORESPONSE_WHITELIST', '').split(',')
AUTORESPONSE_BLACKLIST = os.getenv('AUTORESPONSE_BLACKLIST', '').split(',')
AUTORESPONSE_BLACKLIST.extend(["no-reply@", "noreply@", "@uscourts.gov", "@doi.gov"])
AUTORESPONSE_CONTEXT = os.getenv('AUTORESPONSE_CONTEXT', None)
AUTORESPOND = AUTORESPONSE_CONTEXT != None
### Courtlistener & other webhooks
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
### Keys & passwords
PUBLIC_KEY_FILE = os.getenv("PUBLIC_KEY_FILE", 'you_public_key.asc')
PUBLIC_KEY = (BASE_DIR.parent / PUBLIC_KEY_FILE).read_text()
MAC_ID = os.getenv("MAC_ID")
MAC_UN = os.getenv("MAC_UN")
MAC_PW = os.getenv("MAC_PW")
TIMING_API_KEY = os.getenv("TIMING_API_KEY")
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
### Tailscale
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
TS_ID = os.getenv("TS_ID", "NULL")
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
### Cloudflare
CF_API_BASE_URL = os.getenv("CF_API_BASE_URL")
CF_TOKEN = os.getenv("CF_TOKEN")
CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
### Caddy - not fully implemented
BASE_URL = os.getenv("BASE_URL")
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
### Microsoft Graph
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
MS365_SECRET = os.getenv('MS365_SECRET')
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
MS365_KEY = MS365_KEY_PATH.read_text()
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
### Maintenance
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours

146
sijapi/__main__.py Executable file
View file

@ -0,0 +1,146 @@
#!/Users/sij/miniforge3/envs/api/bin/python
from fastapi import FastAPI, Request, HTTPException, Response
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import ClientDisconnect
from hypercorn.asyncio import serve
from hypercorn.config import Config
import sys
import asyncio
import httpx
import argparse
import json
import ipaddress
import importlib
from dotenv import load_dotenv
from pathlib import Path
from datetime import datetime
import argparse
from . import LOGGER, LOGS_DIR
from .logs import Logger
parser = argparse.ArgumentParser(description='Personal API.')
parser.add_argument('--debug', action='store_true', help='Set log level to INFO')
parser.add_argument('--test', type=str, help='Load only the specified module.')
args = parser.parse_args()
# Using the package logger
main_logger = Logger("main", LOGS_DIR)
main_logger.setup_from_args(args)
logger = LOGGER
# Use the logger
logger.debug("Debug Log")
logger.info("Info Log")
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOST, ENV_PATH, GLOBAL_API_KEY, REQUESTS_DIR, ROUTER_DIR, REQUESTS_LOG_PATH, PUBLIC_SERVICES, TRUSTED_SUBNETS, ROUTERS
# Initialize a FastAPI application
api = FastAPI()
# CORSMiddleware
api.add_middleware(
CORSMiddleware,
allow_origins=['*'],
allow_credentials=True,
allow_methods=['*'],
allow_headers=['*'],
)
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
client_ip = ipaddress.ip_address(request.client.host)
if request.method == "OPTIONS":
# Allow CORS preflight requests
return JSONResponse(status_code=200)
if request.url.path not in PUBLIC_SERVICES:
if not any(client_ip in subnet for subnet in TRUSTED_SUBNETS):
api_key_header = request.headers.get("Authorization")
api_key_query = request.query_params.get("api_key")
if api_key_header:
api_key_header = api_key_header.lower().split("bearer ")[-1]
if api_key_header != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
ERR(f"Invalid API key provided by a requester.")
return JSONResponse(
status_code=401,
content={"detail": "Invalid or missing API key"}
)
response = await call_next(request)
# DEBUG(f"Request from {client_ip} is complete")
return response
api.add_middleware(SimpleAPIKeyMiddleware)
canceled_middleware = """
@api.middleware("http")
async def log_requests(request: Request, call_next):
DEBUG(f"Incoming request: {request.method} {request.url}")
DEBUG(f"Request headers: {request.headers}")
DEBUG(f"Request body: {await request.body()}")
response = await call_next(request)
return response
async def log_outgoing_request(request):
INFO(f"Outgoing request: {request.method} {request.url}")
DEBUG(f"Request headers: {request.headers}")
DEBUG(f"Request body: {request.content}")
"""
@api.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}")
ERR(f"Request: {request.method} {request.url}")
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
@api.middleware("http")
async def handle_exception_middleware(request: Request, call_next):
try:
response = await call_next(request)
except RuntimeError as exc:
if str(exc) == "Response content longer than Content-Length":
# Update the Content-Length header to match the actual response content length
response.headers["Content-Length"] = str(len(response.body))
else:
raise
return response
def load_router(router_name):
router_file = ROUTER_DIR / f'{router_name}.py'
DEBUG(f"Attempting to load {router_name.capitalize()}...")
if router_file.exists():
module_path = f'sijapi.routers.{router_name}'
try:
module = importlib.import_module(module_path)
router = getattr(module, router_name)
api.include_router(router)
INFO(f"{router_name.capitalize()} router loaded.")
except (ImportError, AttributeError) as e:
CRITICAL(f"Failed to load router {router_name}: {e}")
else:
ERR(f"Router file for {router_name} does not exist.")
def main(argv):
if args.test:
load_router(args.test)
else:
CRITICAL(f"sijapi launched")
CRITICAL(f"{args._get_args}")
for router_name in ROUTERS:
load_router(router_name)
config = Config()
config.keep_alive_timeout = 1200
config.bind = [HOST]
asyncio.run(serve(api, config))
if __name__ == "__main__":
main(sys.argv[1:])

496
sijapi/config/.env-example Normal file
View file

@ -0,0 +1,496 @@
#──────────────────────────────────────────────────────────────────────────────────
# C O N F I G U R A T I O N F I L E
#──────────────────────────────────────────────────────────────────────────────────
#
# Hi friend! You've found my hidden .config.YAML-example file. Do you like Zalgo
# text and old-school ASCII art? I bet you do. So listen, this'll be your method
# for configuring sijapi, and nothing works until you at least:
#
# (1) fill in the ESSENTIALS category, and
#
# (2) rename this file `.config.yaml`
#
# ... and even then, certain features will not work until you set other
# relevant variables below.
#
# So get yourself a beverage, put on some sick beats, and settle in for a vibe-y
# configuration sesh. Remember to read my detailed notes if you ever feel lost,
# and most important, remember:
#
# † you are NOT alone,
# † I love you SO much,
# † and you are S̸̢̟̑̒̊ͅō̸͔͕͎̟͜ worthy.
#
# y o u r b f & b f 4 e ,
# .x+=:. . .
# z` ^% @88> .. †††>
# . <k %8P 888> .d`` %†P
# .@8Ned8" . "8P u @8Ne. .u .
# .@^%8888" .@88u . us888u. %8888:u@88N .@88u
# x88: `)8b. ''888E` u888u. .@88 "8888" `888I 888. ''888E`
# ~ 8888N=*8888 888E `'888E 9888 9888 888I 888I 888E
# %8" R88 888E 888E 9888 9888 888I 888I 888E
# @8Wou 9% 888E 888E 9888 9888 uW888L 888' 888E
# .888888P` 888& 888E 9888 9888 '*88888Nu88P 888&
# ` ^"F R888" 888E "888*""888" ~ '88888F` R888"
# "" 888E ^Y" ^Y' 888 ^ ""
# 888E *8E
# 888P '8>
# .J88" " "
#
#
# B U T I H E A R Y O U :
# L E T ' S T A K E I T S L O W A N D
# ───────────── S̢͉̺ T̪͔͓ A͇̞ R̘͕͙ T̢̡͉ W͚̻ I͉͇͜ T̟͖̺ H̡͚͙ T̺̞̠ H̢̢̙ E̢̪͓ ──────────────
#
# ███████╗███████╗███████╗███████╗███╗ ██╗████████╗██╗ █████╗ ██╗ ███████╗
# ██╔════╝██╔════╝██╔════╝██╔════╝████╗ ██║╚══██╔══╝██║██╔══██╗██║ ██╔════╝
# █████╗ ███████╗███████╗█████╗ ██╔██╗ ██║ ██║ ██║███████║██║ ███████╗
# ██╔══╝ ╚════██║╚════██║██╔══╝ ██║╚██╗██║ ██║ ██║██╔══██║██║ ╚════██║
# ███████╗███████║███████║███████╗██║ ╚████║ ██║ ██║██║ ██║███████╗███████║
# ╚══════╝╚══════╝╚══════╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝
# ─────────────────────────────────────────────────────────────────
#
#─── first, bind an ip address and port : ──────────────────────────────────────────
HOST_NET=0.0.0.0
HOST_PORT=4444
BASE_URL=http://localhost:4444 # <--- replace with base URL of reverse proxy, etc
#─── notes: ──────────────────────────────────────────────────────────────────────
#
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
# BASE_URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the sd router.
# BASE_URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
#
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
#
# Here are a few options to consider to more securely enable access from
# other devices:
#
# (1) if all access can occur over Tailscale, either:
# (a) leave HOST_NET set to 127.0.0.1, run `tailscale cert $(tailscale
# whois $(tailscale ip | head -n 1) | awk '/Name:/ {print $2}')
# if you haven't already issued yourself a TLS certificate on
# Tailscale, and then run `tailscale serve --bg --https=4443
# 4444` to expose sijapi to your other tailscale-enabled devices
# at `https://{device.magicdns-domain.net:4443`}; or
# (b) set HOST_NET to your server's Tailscale IP (this should work
# but for me doesn't reliably)
#
# (2) if WAN access truly is required, leave HOST_NET set to 127.0.0.1 and
# configure either:
# (a) a Cloudflare tunnel, or
# (b) a reverse proxy with HTTPS (Caddy is excellent for this).
#
# And please be sure to set a strong API key either way but especially for (2).
# ──────────
#
#──── configure API key authorization and select exemptions──────────────────begin
GLOBAL_API_KEY=¿SECRET? # <--- specify a key to unlock the API
PUBLIC_SERVICES=/id,/ip,/health,/img/,/cl/dockets,/cl/search,/cd/alert
TRUSTED_SUBNETS=127.0.0.1/32,10.13.37.0/24,100.64.64.0/24
#─── notes: ───────────────────────────────────────────────────────────────────end
#
# GLOBAL_API_KEY determines the API key that will be required to access all endpoints, except access to PUBLIC_SERVICES or from TRUSTED_SUBNETS. Authentication is made via an `Authorization: Bearer {GLOBAL_API_KEY}` header.
# TRUSTED_SUBNETS might commonly include 127.0.0.1/32 (localhost), 100.x.x.0/24 (Tailscale tailnet), and/or 192.168.x.0/24 or 10.x.x.0/24 (local network).
# When configuring a reverse proxy or Cloudflare tunnel, please verify traffic through it does not appear to sijapi (i.e. in ./logs) as though it were coming from any of the subnets specified here. For sij, using Caddy, it does not, but your setup may differ.
# ──────────
#
#─── router selection: ────────────────────────────────────────────────────────────
ROUTERS=asr,calendar,cf,email,health,hooks,llm,locate,note,rag,sd,serve,summarize,time,tts,weather
UNLOADED=ig
#─── notes: ──────────────────────────────────────────────────────────────────────
#
# ROUTERS determines which routers are loaded.†
#
# UNLOADED is not used directly -- it's just there to help keep track which routers are disabled.
#
# † ┓ ┏ orth bearing in mind: some routers inherently rely on other routers,
# ┃┃┃ 3rd party APIs, or other apps being installed locally. If a router is
# ┗┻┛ set to load (i.e. is included in ROUTERS) and it depends on another router,
# that other router will also load too irrespective of whether it's listed.
#
# B U T L E T ' S G E T D O W N T O
# , S H A L L W E ?
#
# asr: requires faster_whisper — $ pip install faster_whisper — and
# downloading the model file specified in ASR_DEFAULT_MODEL.
#
# calendar: requires (1) a Microsoft 365 account with a properly configured
# Azure Active Directory app, and/or (2) Calendars on macOS.
#
# cf: interfaces with the Cloudflare API and Caddy to register new
# [sub-]domains on Cloudflare and deploy them with Caddy as
# reverse proxy.
#
# llm: requires ollama — $ pip install ollama — and downloading the
# models set in LLM_DEFAULT_MODEL and LLM_VISION_MODEL.
#
# email: email auto-responders and summarizers to be found here. Designed
# for use with IMAP.
#
# hooks: designed for two specific use cases: monitoring court dockets
# through CourtListener.org, and monitoring arbitrary web pages for
# changes in tandem with a self-hosted changedetection.io instance.
# Both require accounts; other functionality would require
# additional / modified code.
#
# ig: requires an Instagram account, with credentials and other settings
# configured separately in the ig_config.json file; relies heavily
# on the llm and sd routers which have their own dependencies.
#
# locate: some endpoints work as is, but the core location tracking
# functionality requires Postgresql + PostGIS extension and are
# designed specifically to pair with a mobile device where
# Pythonista is installed and configured to run the
# `gps_tracker.py` and `gps_upload.py` scripts periodically or per
# repeating conditionwy (e.g. via automation under Apple Shortcuts).
#
# note: designed for use with Obsidian plus the Daily Notes and Tasks
# core extensions; and the Admonitions, Banners, Icons (with the
# Lucide pack), and Make.md community extensions. Moreover `notes`
# relies heavily on the calendar, llm, locate, sd, summarize, time,
# tts, and weather routers and accordingly on the external
# dependencies of each.
#
# sd: requires ComfyUI plus any modules and StableDiffusion models
# set in sd_config and individual workflow .json files.
#
# summarize: relies on the llm router and thus requires ollama.
#
# time: requires the subscription-based macOS app 'Timing' (one of many
# apps that together make SetApp an incredible value for macOS users!)
#
# tts: designed for use with coqui — $ pip install coqui — and/or the
# ElevenLabs API.
#
# weather: requires a VisualCrossing API key and is designed for (but doesn't
# itself strictly require) Postgresql with the PostGIS extension;
# (... but it presently relies on the locate router, which does).
#
#
# ... Whew! that was a lot, right? I'm so glad we're in this together...
# ──────────
#
#───────── W H A T A R E Y O U R D I G I T S , H O N E Y B U N ? ────────
# LOCALIZATION
#─── what are your digits, honey-bun?: ──────────────────────────────────────────────
TZ=America/Los_Angeles
HOME_ZIP=97401
#─── notes: ─────────────────────────────────────────────────────────────────────────
#
# ──────────
#
#─────────────────────── Y ₒ ᵤ ' ᵣ ₑ G ₒ ₙ ₙ ₐ ₗ ₒ ᵥ ₑ ────────────────────────
#
# ░ ░░ ░░ ░ ░░░░░░░░ ░░░ ░░░ ░░ ░░░░░░░ ░
# ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒
# ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓▓▓▓▓ ▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓ ▓▓▓▓▓▓▓ ▓▓▓
# ████ ████ ████ ████ █████████████ █ ████ █ █ ███████ ███████
# ████ ████ ████ █ █ ██ ███ ██ ████ █ █ █
#
# A N D I ' M N O T. E V E N. J E A L O U S.
# Y O U D E S E R V E I T A L L , B A B Y C A K E S.
#
#─── use tailscale for secure remote access: ───────────────────────────────────────
TS_IP=100.13.37.5 # <--- enter your own TS IP address
TS_SUBNET=100.13.37.0/24 # <--- enter your own TS subnet (IPv4/CIDR)
TS_ID=¿SECRET? # <--- enter your own TS device name
TS_TAILNET=screaming_sailfin # <--- enter your own TS tailnet / MagicDNS name
TAILSCALE_API_KEY=¿SECRET? # <--- enter your own TS API key
#─── notes: ────────────────────────────────────────────────────────────────────────
#
# TS_IP should match the Tailscale IP of the device. But this is deprecated, and if the functionality becomes relevant again, it should be come back in the form of a dynamic check (`tailscale status` in a shell subprocess) in __init__.py or even the /id endpoint.
# TS_SUBNET should match the IP/CIDR-format tailnet
# TS_ID currently has two roles: it's used to assemble the complete MagicDNS of the server, and it determines what the /id endpoint on the health router returns. This is relevant where multiple servers run the script behind a load balancer (e.g. Caddy), as a means to check which server responds. Bear in mind that /id is NOT API key-protected by default here.
# TS_TAILNET should match the tailnet's MagicDNS domain (omitting the `.net`, for reasons)
# ──────────
#
#──────────── ᵁ & ᴹ ᴱ , W E C A N G E T T H R O U G H ────────────────────
#
# ██▓███ ▒█████ ██████ ▄▄▄█████▓ ▄████ ██▀███ ▓█████ ██████
# ▓██░ ██▒██▒ ██▒▒██ ▒ ▓ ██▒ ▓▒ ██▒ ▀█▒▓██ ▒ ██▒▓█ ▀ ▒██ ▒
# ▓██░ ██▓▒██░ ██▒░ ▓██▄ ▒ ▓██░ ▒░▒██░▄▄▄░▓██ ░▄█ ▒▒███ ░ ▓██▄
# ▒██▄█▓▒ ▒██ ██░ ▒ ██▒░ ▓██▓ ░ ░▓█ ██▓▒██▀▀█▄ ▒▓█ ▄ ▒ ██▒
# ▒██▒ ░ ░ ████▓▒░▒██████▒▒ ▒██▒ ░ ░▒▓███▀▒░██▓ ▒██▒░▒████▒▒██████▒▒
# ▒██▒ ░ ░ ▒░▒░▒░ ▒ ▒▓▒ ▒ ░ ▒ ░░ ░▒ ▒ ░ ▒▓ ░▒▓░░░ ▒░ ░▒ ▒▓▒ ▒ ░
# ▒▓▒░ ░ ▒ ▒░ ░ ░▒ ░ ░ ░ ░ ░ ░▒ ░ ▒░ ░ ░ ░░ ░▒ ░ ░
# ░▒ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░░ ░ ░ ░ ░ ░
# ░░ ░ ░T̷ O̷ G̷ E̷ T̷ H̷ ░ R̷. ░ ░ ░ ░ ░
#
#─── frag, or weat,and locate modules:── .
DB=db
#
DB_HOST=127.0.0.1
DB_PORT=5432
# R E A L T I G H T.
DB_USER=postgres
DB_PASS=¿SECRET? # <--- enter your own Postgres password'
# Y E A H . . .
DB_SSH=100.64.64.15
# . . . 𝙹 𝚄 𝚂 𝚃 𝙻 𝙸 𝙺 𝙴 𝚃 𝙷 𝙰 𝚃.
DB_SSH_USER=sij
DB_SSH_PASS=¿SECRET? # <--- enter SSH password for pg server (if not localhost)
#─── notes: ────────────────────────────────────────────────── S E E ? 𝕰 𝖅 - 𝕻 𝖅
#
# DB, DB_HOST, DB_PORT, DB_USER, and DB_PASS should specify those respective
# credentials for your Postgres database. DB_SSH and associated _USER and _PASS
# variables allow database access over an SSH tunnel.
#
# In the current implementation, we rely on Postgres to hold:
# i. user-logged location data (locate module), and
# ii. results from past weather forecast checks (weather module).
#
# A future version will hopefully make use of PostGIS's geocoding capabilities,
# and add a vector database for the LLM module. Until then it's up to you if the
# locate and weather modules are worth the hassle of maintaining Postgres.
# ──────────
#
#─────────────────────────────── 𝐼 𝐵 𝐸 𝑇 𝑌 𝑂 𝑈 ─────────────────────────────────
# 𝑅 𝐸 𝐶 𝐸 𝐼 𝑉 𝐸 𝐴 𝐿 𝑂 𝑇 𝑂 𝐹 𝐿 𝑂 𝑉 𝐸 𝐿 𝐸 𝑇 𝑇 𝐸 𝑅 𝑆 𝑂 𝑉 𝐸 𝑅
#
# .----------------. .----------------. .----------------. .----------------.
# | .--------------. | .--------------. | .--------------. | .--------------. |
# | | _____ | | | ____ ____ | | | __ | | | ______ | |
# | | |_ _| | | ||_ \ / _|| | | / \ | | | |_ __ \ | |
# | | | | | | | | \/ | | | | / /\ \ | | | | |__) | | |
# | | | | | | | | |\ /| | | | | / ____ \ | | | | ___/ | |
# | | _| |_ | | | _| |_\/_| |_ | | | _/ / \ \_ | | | _| |_ | |
# | | |_____| | | ||_____||_____|| | ||____| |____|| | | |_____| | |
# | | | | | | | | | | | | |
# | '--------------' | '--------------' | '--------------' | '--------------' |
# '----------------' '----------------' '----------------' '----------------'
#
# 𝙴 𝙼 𝙰 𝙸 𝙻
#
#─── imap & smtp: ────────────────────────────────────────────────────────────────────────
IMAP_HOST=127.0.0.1
IMAP_EMAIL=¿SECRET? # <--- enter yours
IMAP_PASSWORD=¿SECRET? # <--- enter yours
IMAP_PORT=1142
IMAP_ENCRYPTION=STARTTLS
SMTP_PORT=1024
SMTP_ENCRYPTION=SSL
AUTORESPONSE_WHITELIST=¿SECRET? # <--- enter complete/fragmented emails, or keywords
AUTORESPONSE_BLACKLIST=¿SECRET? # <--- same deal-io
AUTORESPONSE_CONTEXT=¿SECRET? # <--- inform the LLM why it's auto-responding for you'
USER_FULLNAME=¿SECRET? # <--- more context for the LLM
USER_BIO=¿SECRET? # <--- yet more context for the nosy LLM
#─── notes: ───────────────────────────────────────────────────────────────────────────────
#
# This is primarily for summarizing incoming emails. Any IMAP account should work, but
# I focused testing on a somewhat complex setup involving Protonmail Bridge.
#
# ──────────
#
#
#─── ms365 (calendars): ──────────────────────────────────────────────────────────────
ICAL_TOGGLE=True
ICALENDARS='E68FE085-2ECA-4097-AF0A-8D38C404D8DA,AB5A0473-16DD-4916-BD6D-F12AC2455285'
MS365_TOGGLE=False
MS365_CLIENT_ID=¿SECRET? # <--- enter your client ID (found in Azure pane)
MS365_TENANT_ID=¿SECRET? # <--- enter your tenant ID (found in Azure pane)
MS365_SECRET=¿SECRET? # <--- enter your app secret (found in Azure pane)
MS365_SCOPE='basic,calendar_all,Calendars.Read,Calendars.ReadWrite,offline_access'
MS365_TOKEN_FILE=oauth_token.txt
MS365_LOGIN_URL='https://login.microsoftonline.com'
MS365_REDIRECT_PATH=¿SECRET? # <--- e.g. http://localhost:4444/o365/oauth_redirect
#─── notes: ───────────────────────────────────────────────────────────────────────────────
#
# # MS365_CLIENT_ID, _TENANT_ID, _SECRET, AND _SCOPES must be obtained from Microsoft
# via the Azure portal, by creating a new app registration and an accompanying secret.
# MS365_THUMBPRINT is vestige of an earlier failed attempt to get this working, and
# for now is deprecated. I recommend seeking out a well-reviewed tutorial for
# creating an app on Azure with a client_id and secret and necessary scopes for
# individual calendar access, because I had one heck of a time trying various approaches.
# Do better, Microsoft.
#
# ──────────
#
#
#──────────────────── L E T ' S G E T S I L L Y , ─────────────────────────────
# T H E N G O B͎̝̪̼͉͜ O͖͕͇͚͉̼ N̢̦͖̺͔͎ K̠͓̠͖͜ E̝̼̫̙͔̞ R̡͇͖̙͉͎ S̡͉̠͎͙̪
# W I T H O U R O W N
#
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓██████▒▓██████▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
# ░▒▓████████▓▒ ░▒▓████████▓▒ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
#
#
# ( F O R R E A L T H O U G H , T H E S E A R E
#
#─── via comfyui (stable diffusion): ─────── S̝͖̦͓̪̻ O̡͖̘̫͇̟ H̢͔͔̫͉͜ O̢̢͉̞͍̘ T̟͍͍̪̦̞ R I G H T N O W
LLM_URL=http://localhost:11434
SYSTEM_MSG=You are a helpful AI assistant.
DEFAULT_LLM=dolphin-mistral
DEFAULT_VISION=llava-llama3
OPENAI_API_KEY=¿SECRET? # <--- not presently implemented for anything
SUMMARY_MODEL=dolphin-mistral
SUMMARY_CHUNK_SIZE=4000
SUMMARY_CHUNK_OVERLAP=100
SUMMARY_TPW=1.3
SUMMARY_LENGTH_RATIO=4
SUMMARY_MIN_LENGTH=150
SUMMARY_TOKEN_LIMIT=4096
SUMMARY_INSTRUCT='You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
SUMMARY_INSTRUCT_TTS='You are an AI assistant that summarizes emails -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
DEFAULT_VOICE=joanne
WHISPER_CPP_DIR='whisper.cpp'
WHISPER_CPP_MODELS=tiny,base,base-en,small,medium,medium-en,large-v3
WEBCLIPPER_TTS=elevenlabs
EMAIL_SUMMARY_TTS=local
YEAR_FMT="%Y"
MONTH_FMT="%Y-%m %B"
DAY_FMT="%Y-%m-%d %A"
DAY_SHORT_FMT="%Y-%m-%d"
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
# The exact values here will depend on what software you are using to inference an LLM,
# and of course what models and capabilities are available through it. The script was
# designed for use with `ollama`, but most of the functionality should be equal with
# LM Studio, LocalAI, ect...
#
# DEFAULT_LLM is self-explanatory; DEFAULT_VISION is used for image recognition within
# a multimodal chat context, such as on the ig module for generating intelligible
# comments to Instagram posts, or more realistic captions for sd-generated images.
#
# Note it's possible to specify a separate model for general purposes and for
# summarization tasks. The other SUMMARY_ variables call for some explanation,
# in particular six that are most relevant when summarizing very long documents:
#
# SUMMARY_CHUNK_SIZE: determines the maximum length, in tokens, the pieces that are
# split and sent individually to the model.
#
# SUMMARY_CHUNK_OVERLAP: determines how much of each chunk is overlapped with the prior
# and next chunks. Set too high causes repetition, set too low
# causes misunderstood confusion and poor summary results.
# The summarization algorithm is flawed but I've gotten the best
# results with this set around 100200.
#
# SUMMARY_TPW: used in estimating the token count of a prompt for purposes of
# complying with the maximum tokens a model can handle at once.
# Best you can do is estimate. I tend to use long words a fair
# excessively and found my average was 1.3 tokens per word. YMMV.
#
# SUMMARY_LENGTH_RATIO: this is the primary control over the length of generated
# summaries, expressed as the ratio of original text length to
# summary length. The default, 4, means the summaries will be
# around 1/4 the length of the original text you provide it.
#
# SUMMARY_MIN_LENGTH: the default SUMMARY_LENGTH_RATIO of 4 isn't ideal for very
# short texts, but setting it any lower sacrifices conciseness
# in summaries of longer texts. In short one size doesn't fit
# all. The compromise I landed on was to set a "maximum minimum"
# summary length: under no circumstances will the script impose
# a smaller maximum length than this value.
#
# SUMMARY_INSTRUCT: sets the prompt used when summarizing text.
#
# SUMMARY_INSTRUCT_TTS: sets a separate prompt for use when summarizing text where
# tts output was requested; tends to yield "cleaner" audio
# with less numbers (page numbers, citations) and other
# information extraneous to spoken contexts.
#
# DEFAULT_VOICE: used for all tts tasks when a specific voice is not requested.
#
# ──────────
#
#
#────,-_/────────── W E C A N E X P E R I M E N T W I T H ──────────.───────────
# ' | ,~-,-. ,-. ,-. ,--. | --' ,--. ,-. ,--. ,-. ,-. |-- . ,-. ,-.
# .^ | | | | ,--| | | | --' | -,- | --' | | | --' | ,--| | | | | | |
# `--' ' ' ' `-^ `-| `--' `---| `--' ' ' `--' ' `--^ `' ` `-' ' '
# , | ,-. | ~ 𝙸 𝙽 𝚃 𝙷 𝙴 𝙽 𝚄 𝙳 𝙴 . ~
# `~~' `-+'
# O R F U L L Y C L O T H E D ── U P T O Y O U
#
#─── via comfyui (stable diffusion): ───── ( B U T L E T M E K N O W , Y E A H ? )
COMFYUI_URL=http://localhost:8188
COMFYUI_DIR=/Users/sij/workshop/ComfyUI
COMFYUI_LAUNCH_CMD="mamba activate comfyui && python main.py"
OBSIDIAN_BANNER_SCENE=wallpaper
PHOTOPRISM_USER=NOT_IMPLEMENTED
PHOTOPRISM_PASS=NOT_IMPLEMENTED
ANONYMIZED_TELEMETRY=False
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
# COMFY_URL, as you may expect, should point to the URL you use to access ComfyUI. If you
# don't know, watch for it in the server logs once ComfyUI is fully launched.
#
# COMFYUI_DIR, with similar self-evidence, should point to the base directory of your
# ComfyUI installation (i.e. the folder that contains `models`, `inputs`, and `outputs`).
# It can handle either a
#
# PhotoPrism integration is not yet implemented, so don't bother with that just yet.
# ──────────
#
# D O N ' T M I S S O N E ───────────────────────────────────────
#\ F I N A L S M A T T E R I N G O F Ⓜ Ⓘ Ⓢ Ⓒ Ⓔ Ⓛ Ⓛ Ⓐ Ⓝ Ⓨ \
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ _/\\\\_ _ _ _ _ _ /\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\\\\_ _ _ _ /\\\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\//\\\_ _ /\\\//\\\ _ _/\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# \ _ _ \/\\\\///\\\/\\\/ \/\\\ _ _///_ _ _/\\\\\\\\\\_ _ _ _/\\\\\\\\_ _\
# \ _ _ \/\\\ _\///\\\/ _ \/\\\ _ _/\\\ _ \/\\\////// _ _ _/\\\////// _ _\
# \ _ _ \/\\\ _ _\/// _ _ \/\\\ _ _/\\\ _ \/\\\\\\\\\\_ _ /\\\_ _ _ _ _ _\
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ \////////\\\_ _\//\\\ _ _ _ _ _\
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ _/\\\\\\\\\\_ _ \///\\\\\\\\_ _\
# \ _ _ \///_ _ _ _ _ _ _ \///_ _ _///_ _ \////////// _ _ _ \//////// _ _\
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
# ─────────────────── A N D O T H E R W H A T - H A V E - Y O U S ──
#
#─── other needful API keys, mainly: ────────────────────────────────────────────────────
CF_API_BASE_URL=¿SECRET? # <--- Cloudflare API URL
CF_TOKEN=¿SECRET? # <--- Cloudflare Token
VISUALCROSSING_BASE_URL='https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline'
VISUALCROSSING_API_KEY=¿SECRET? # <--- VisualCrossing API key (for Weather)
ELEVENLABS_API_KEY=¿SECRET? # <--- ElevenLabs API key (for TTS)
COURTLISTENER_BASE_URL='https://www.courtlistener.com'
COURTLISTENER_API_KEY=¿SECRET? # <--- CourtListener API key (for court docket entries)
TIMING_API_URL='https://web.timingapp.com/api/v1'
TIMING_API_KEY=¿SECRET? # <--- API key for macOS/web app Timing (time tracking)
PUBLIC_KEY_FILE=sij.asc # <--- public PGP key (served at /pgp)
MAC_ID=¿SECRET? # <--- Tailscale hostname for primary macOS (alerts)
MAC_UN=¿SECRET? # <--- Primary macOS username
MAC_PW=¿SECRET? # <--- Primary macOS password
#─── notes: ──────────────────────────────────────────────────────────────────────────────
#
#
# CF_TOKEN: a Cloudflare token. This is used on the cf router for quick
# deployment of new domains in tandem with Caddy and for ddns.
#
# VISUALCROSSING_API_KEY: used for obtaining weather forecasts. It is a very data-rich
# yet affordable source of weather info, with a generous free
# plan.
#
# ELEVENLABS_API_KEY: used when on the tts router if tts tasks are outsourced to
# the state-of-the-art models at ElevenLabs.
#
# COURTLISTENER_API_KEY: used primarily on the hooks router, but likely relevant only
# to legal professionals that will be aware what it is for.
#
# TIMING_API_URL: are used on the time router for generating various tasks
# & related to timekeeping, as well as on the notes router for
# TIMING_API_KEY: generating markdown-formatted timeslips. It requires an
# active subscription to the Timing app (macOS or web), but
# it's worth noting comes included in the SetApp subscribtion
# bundle, for the same price, last I checked, as subscribing to
# Timing alone. If you have a Mac and somehow don't know this
# already, SetApp is an utterly insane value. I pay $15/mo for
# apps that I would otherwise pay ~$100/mo for if subscribing
# individually. I want to say I wasn't paid to say this, but
# with those savings I almost feel like I was.
#
# MAC_ID: These last three variables are for a specific use case where
# MAC_UN: you want certain commands run, or alerts appearing, on a
# MAD_PW: designated macaOS computer. The alerts router is designed to
# deliver OS-level notifications to the specified Mac when a
# webhook gets a hit on specified keywords within the payload.
# Setting the MAC_ID to the TS_ID of the target Mac, allows
# the script to readily know whether it itself is the target
# (this is relevant in a load-balancing context), and how to
# reach the target if not — to wit, ssh using MagicDNS.

88
sijapi/logs.py Normal file
View file

@ -0,0 +1,88 @@
import os
import sys
import logging
from logging.handlers import RotatingFileHandler
from colorama import Fore, Back, Style, init as colorama_init
import traceback
# Force colorama to initialize for the current platform
colorama_init(autoreset=True, strip=False, convert=True)
class ColorFormatter(logging.Formatter):
"""Custom formatter to add colors to log levels."""
COLOR_MAP = {
logging.DEBUG: Fore.CYAN,
logging.INFO: Fore.GREEN,
logging.WARNING: Fore.YELLOW,
logging.ERROR: Fore.RED,
logging.CRITICAL: Fore.MAGENTA + Back.WHITE,
}
def format(self, record):
log_message = super().format(record)
color = self.COLOR_MAP.get(record.levelno, '')
return f"{color}{log_message}{Style.RESET_ALL}"
class Logger:
def __init__(self, name, logs_dir):
self.logs_dir = logs_dir
self.logger = logging.getLogger(name)
self.logger.setLevel(logging.DEBUG)
def setup_from_args(self, args):
if not os.path.exists(self.logs_dir):
os.makedirs(self.logs_dir)
# File handler
handler_path = os.path.join(self.logs_dir, 'app.log')
file_handler = RotatingFileHandler(handler_path, maxBytes=2000000, backupCount=10)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
file_handler.setLevel(logging.DEBUG)
# Console handler
console_handler = logging.StreamHandler(sys.stdout) # Explicitly use sys.stdout
console_formatter = ColorFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(console_formatter)
# Set console handler level based on args
if args.debug:
console_handler.setLevel(logging.DEBUG)
else:
console_handler.setLevel(logging.INFO)
# Add handlers to logger
self.logger.addHandler(file_handler)
self.logger.addHandler(console_handler)
# Test color output
self.logger.debug("Debug message (should be Cyan)")
self.logger.info("Info message (should be Green)")
self.logger.warning("Warning message (should be Yellow)")
self.logger.error("Error message (should be Red)")
self.logger.critical("Critical message (should be Magenta on White)")
def get_logger(self):
return self.logger
# Add this at the end of the file for testing
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
logger = Logger("test", "logs")
logger.setup_from_args(args)
test_logger = logger.get_logger()
print("FORCE_COLOR:", os.environ.get('FORCE_COLOR'))
print("NO_COLOR:", os.environ.get('NO_COLOR'))
print("TERM:", os.environ.get('TERM'))
print("PYCHARM_HOSTED:", os.environ.get('PYCHARM_HOSTED'))
print("PYTHONIOENCODING:", os.environ.get('PYTHONIOENCODING'))
test_logger.debug("This is a debug message")
test_logger.info("This is an info message")
test_logger.warning("This is a warning message")
test_logger.error("This is an error message")
test_logger.critical("This is a critical message")

165
sijapi/routers/asr.py Normal file
View file

@ -0,0 +1,165 @@
'''
Automatic Speech Recognition module relying on the `whisper_cpp` implementation of OpenAI's Whisper model.
Depends on:
LOGGER, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR
Notes:
Performs exceptionally well on Apple Silicon. Other devices will benefit from future updates to optionally use `faster_whisper`, `insanely_faster_whisper`, and/or `whisper_jax`.
'''
from fastapi import APIRouter, HTTPException, Form, UploadFile, File
from pydantic import BaseModel, Field
from typing import Optional
import tempfile
from fastapi.responses import JSONResponse, FileResponse
from pydantic import BaseModel, HttpUrl
from whisperplus.pipelines import mlx_whisper
from youtube_dl import YoutubeDL
from urllib.parse import unquote
import subprocess
import os
import uuid
from threading import Thread
import multiprocessing
import asyncio
import subprocess
import tempfile
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES
asr = APIRouter()
class TranscribeParams(BaseModel):
model: str = Field(default="small")
output_srt : Optional[bool] = Field(default=False)
language : Optional[str] = Field(None)
split_on_word : Optional[bool] = Field(default=False)
temperature : Optional[float] = Field(default=0)
temp_increment : Optional[int] = Field(None)
translate : Optional[bool] = Field(default=False)
diarize : Optional[bool] = Field(default=False)
tiny_diarize : Optional[bool] = Field(default=False)
no_fallback : Optional[bool] = Field(default=False)
output_json : Optional[bool] = Field(default=False)
detect_language : Optional[bool] = Field(default=False)
dtw : Optional[str] = Field(None)
threads : Optional[int] = Field(None)
from urllib.parse import unquote
import json
@asr.post("/asr")
@asr.post("/transcribe")
@asr.post("/v1/audio/transcription")
async def transcribe_endpoint(
file: UploadFile = File(...),
params: str = Form(...)
):
try:
# Decode the URL-encoded string
decoded_params = unquote(params)
# Parse the JSON string
parameters_dict = json.loads(decoded_params)
# Create TranscribeParams object
parameters = TranscribeParams(**parameters_dict)
except json.JSONDecodeError as json_err:
raise HTTPException(status_code=400, detail=f"Invalid JSON: {str(json_err)}")
except Exception as err:
raise HTTPException(status_code=400, detail=f"Error parsing parameters: {str(err)}")
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(await file.read())
temp_file_path = temp_file.name
transcription = await transcribe_audio(file_path=temp_file_path, params=parameters)
return transcription
async def transcribe_audio(file_path, params: TranscribeParams):
file_path = convert_to_wav(file_path)
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
command.extend(['-m', str(model_path)])
command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
command.extend(['-np']) # Always enable no-prints
if params.split_on_word:
command.append('-sow')
if params.temperature > 0:
command.extend(['-tp', str(params.temperature)])
if params.temp_increment:
command.extend(['-tpi', str(params.temp_increment)])
if params.language:
command.extend(['-l', params.language])
elif params.detect_language:
command.append('-dl')
if params.translate:
command.append('-tr')
if params.diarize:
command.append('-di')
if params.tiny_diarize:
command.append('-tdrz')
if params.no_fallback:
command.append('-nf')
if params.output_srt:
command.append('-osrt')
elif params.output_json:
command.append('-oj')
else:
command.append('-nt')
if params.dtw:
command.extend(['--dtw', params.dtw])
command.extend(['-f', file_path])
DEBUG(f"Command: {command}")
proc = await asyncio.create_subprocess_exec(
*command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise Exception(f"Error running command: {stderr.decode()}")
result = stdout.decode().strip()
DEBUG(f"Result: {result}")
return result
def convert_to_wav(file_path: str):
wav_file_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.wav")
subprocess.run(["ffmpeg", "-y", "-i", file_path, "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_path], check=True)
return wav_file_path
def download_from_youtube(url: str):
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
ytdl_opts = {
'outtmpl': temp_file,
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
'nooverwrites': True
}
with YoutubeDL(ytdl_opts) as ydl:
ydl.download([url])
return convert_to_wav(temp_file)
def format_srt_timestamp(seconds: float):
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
def write_srt(segments: list, output_file: str):
with open(output_file, 'w') as f:
for i, segment in enumerate(segments, start=1):
start = format_srt_timestamp(segment['start'])
end = format_srt_timestamp(segment['end'])
text = segment['text']
f.write(f"{i}\n{start} --> {end}\n{text}\n\n")

209
sijapi/routers/cf.py Normal file
View file

@ -0,0 +1,209 @@
'''
IN DEVELOPMENT - Cloudflare + Caddy module. Based on a bash script that's able to rapidly deploy new Cloudflare subdomains on new Caddy reverse proxy configurations, managing everything including restarting Caddy. The Python version needs more testing before actual use.
'''
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from fastapi.responses import PlainTextResponse, JSONResponse
from typing import Optional
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
import httpx
import asyncio
from asyncio import sleep
import os
cf = APIRouter()
class DNSRecordRequest(BaseModel):
full_domain: str
ip: Optional[str] = None
port: str
# Update to make get_zone_id async
async def get_zone_id(domain: str) -> str:
url = f"{CF_API_BASE_URL}/zones"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
params = {"name": domain}
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
if data['success']:
if len(data['result']) > 0:
return data['result'][0]['id']
else:
raise ValueError(f"No Zone ID found for domain '{domain}'")
else:
errors = ', '.join(err['message'] for err in data['errors'])
raise ValueError(f"Cloudflare API returned errors: {errors}")
async def update_caddyfile(full_domain, caddy_ip, port):
caddy_config = f"""
{full_domain} {{
reverse_proxy {caddy_ip}:{port}
tls {{
dns cloudflare {{"$CLOUDFLARE_API_TOKEN"}}
}}
}}
"""
with open(CADDYFILE_PATH, 'a') as file:
file.write(caddy_config)
# Using asyncio to create subprocess
proc = await asyncio.create_subprocess_exec("sudo", "systemctl", "restart", "caddy")
await proc.communicate()
# Retry mechanism for API calls
async def retry_request(url, headers, max_retries=5, backoff_factor=1):
for retry in range(max_retries):
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url, headers=headers)
response.raise_for_status()
return response
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
ERR(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
await sleep(backoff_factor * (2 ** retry))
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
# Helper function to load Caddyfile domains
def load_caddyfile_domains():
with open(CADDYFILE_PATH, 'r') as file:
caddyfile_content = file.read()
domains = []
for line in caddyfile_content.splitlines():
if line.strip() and not line.startswith('#'):
if "{" in line:
domain = line.split("{")[0].strip()
domains.append(domain)
return domains
# Endpoint to add new configuration to Cloudflare, Caddyfile, and cf_domains.json
@cf.post("/cf/add_config")
async def add_config(record: DNSRecordRequest):
full_domain = record.full_domain
caddy_ip = record.ip or "localhost"
port = record.port
# Extract subdomain and domain
parts = full_domain.split(".")
if len(parts) == 2:
domain = full_domain
subdomain = "@"
else:
subdomain = parts[0]
domain = ".".join(parts[1:])
zone_id = await get_zone_id(domain)
if not zone_id:
raise HTTPException(status_code=400, detail=f"Zone ID for {domain} could not be found")
# API call setup for Cloudflare A record
endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
data = {
"type": "A",
"name": subdomain,
"content": CF_IP,
"ttl": 120,
"proxied": True
}
async with httpx.AsyncClient() as client:
response = await client.post(endpoint, headers=headers, json=data)
result = response.json()
if not result.get("success", False):
error_message = result.get("errors", [{}])[0].get("message", "Unknown error")
error_code = result.get("errors", [{}])[0].get("code", "Unknown code")
raise HTTPException(status_code=400, detail=f"Failed to create A record: {error_message} (Code: {error_code})")
# Update Caddyfile
await update_caddyfile(full_domain, caddy_ip, port)
return {"message": "Configuration added successfully"}
@cf.get("/cf/list_zones")
async def list_zones_endpoint():
domains = await list_zones()
return JSONResponse(domains)
async def list_zones():
endpoint = f"{CF_API_BASE_URL}/zones"
headers = {
"Authorization": f"Bearer {CF_TOKEN}",
"Content-Type": "application/json"
}
async with httpx.AsyncClient() as client: # async http call
response = await client.get(endpoint, headers=headers)
response.raise_for_status()
result = response.json()
if not result.get("success"):
raise HTTPException(status_code=400, detail="Failed to retrieve zones from Cloudflare")
zones = result.get("result", [])
domains = {}
for zone in zones:
zone_id = zone.get("id")
zone_name = zone.get("name")
domains[zone_name] = {"zone_id": zone_id}
records_endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
async with httpx.AsyncClient() as client: # async http call
records_response = await client.get(records_endpoint, headers=headers)
records_result = records_response.json()
if not records_result.get("success"):
raise HTTPException(status_code=400, detail=f"Failed to retrieve DNS records for zone {zone_name}")
records = records_result.get("result", [])
for record in records:
record_id = record.get("id")
domain_name = record.get("name").replace(f".{zone_name}", "")
domains[zone_name].setdefault(domain_name, {})["dns_id"] = record_id
return domains
@cf.get("/cf/compare_caddy", response_class=PlainTextResponse)
async def crossreference_caddyfile():
cf_domains_data = await list_zones()
caddyfile_domains = load_caddyfile_domains()
cf_domains_list = [
f"{sub}.{domain}" if sub != "@" else domain
for domain, data in cf_domains_data.items()
for sub in data.get("subdomains", {}).keys()
]
caddyfile_domains_set = set(caddyfile_domains)
cf_domains_set = set(cf_domains_list)
only_in_caddyfile = caddyfile_domains_set - cf_domains_set
only_in_cf_domains = cf_domains_set - caddyfile_domains_set
markdown_output = "# Cross-reference cf_domains.json and Caddyfile\n\n"
markdown_output += "## Domains only in Caddyfile:\n\n"
for domain in only_in_caddyfile:
markdown_output += f"- **{domain}**\n"
markdown_output += "\n## Domains only in cf_domains.json:\n\n"
for domain in only_in_cf_domains:
markdown_output += f"- **{domain}**\n"
return markdown_output

253
sijapi/routers/email.py Normal file
View file

@ -0,0 +1,253 @@
'''
IN DEVELOPMENT Email module. Uses IMAP and SMTP login credentials to monitor an inbox and summarize incoming emails that match certain criteria and save the Text-To-Speech converted summaries into a specified "podcast" folder.
UNIMPLEMENTED: AI auto-responder.
'''
from fastapi import APIRouter
import asyncio
from imbox import Imbox
from bs4 import BeautifulSoup
import os
from pathlib import Path
from shutil import move
import tempfile
import re
import ssl
from smtplib import SMTP_SSL
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from datetime import datetime as dt_datetime
from pydantic import BaseModel
from typing import List, Optional, Any
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, DATA_DIR, OBSIDIAN_VAULT_DIR, PODCAST_DIR, IMAP, OBSIDIAN_JOURNAL_DIR, DEFAULT_VOICE, AUTORESPONSE_BLACKLIST, AUTORESPONSE_WHITELIST, AUTORESPONSE_CONTEXT, USER_FULLNAME, USER_BIO, AUTORESPOND, TZ
from sijapi.routers import summarize, tts, llm
from sijapi.utilities import clean_text, assemble_journal_path, localize_dt, extract_text, prefix_lines
email = APIRouter(tags=["private"])
class Contact(BaseModel):
email: str
name: str
class EmailModel(BaseModel):
sender: str
recipients: List[Contact]
datetime_received: dt_datetime
subject: str
body: str
attachments: Optional[List[Any]] = None
def imap_conn():
return Imbox(IMAP.host,
username=IMAP.email,
password=IMAP.password,
port=IMAP.imap_port,
ssl=IMAP.imap_encryption == 'SSL',
starttls=IMAP.imap_encryption == 'STARTTLS')
def clean_email_content(html_content):
soup = BeautifulSoup(html_content, "html.parser")
return re.sub(r'[ \t\r\n]+', ' ', soup.get_text()).strip()
async def extract_attachments(attachments) -> List[str]:
attachment_texts = []
for attachment in attachments:
attachment_name = attachment.get('filename', 'tempfile.txt')
_, ext = os.path.splitext(attachment_name)
ext = ext.lower() if ext else '.txt'
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
tmp_file.write(attachment['content'].getvalue())
tmp_file_path = tmp_file.name
try:
attachment_text = await extract_text(tmp_file_path)
attachment_texts.append(attachment_text)
finally:
if os.path.exists(tmp_file_path):
os.remove(tmp_file_path)
return attachment_texts
async def process_unread_emails(auto_respond: bool = AUTORESPOND, summarize_emails: bool = True, podcast: bool = True):
while True:
try:
with imap_conn() as inbox:
unread_messages = inbox.messages(unread=True)
for uid, message in unread_messages:
recipients = [Contact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
this_email = EmailModel(
sender=message.sent_from[0]['email'],
datetime_received=localize_dt(message.date),
recipients=recipients,
subject=message.subject,
body=clean_email_content(message.body['html'][0]) if message.body['html'] else clean_email_content(message.body['plain'][0]) or "",
attachments=message.attachments
)
DEBUG(f"\n\nProcessing email: {this_email.subject}\n\n")
md_path, md_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".md")
tts_path, tts_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".wav")
if summarize_emails:
email_content = f'At {this_email.datetime_received}, {this_email.sender} sent an email with the subject line "{this_email.subject}". The email in its entirety reads: \n\n{this_email.body}\n"'
if this_email.attachments:
attachment_texts = await extract_attachments(this_email.attachments)
email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts])
summary = await summarize.summarize_text(email_content)
await tts.local_tts(text_content = summary, speed = 1.1, voice = DEFAULT_VOICE, podcast = podcast, output_path = tts_path)
if podcast:
if PODCAST_DIR.exists():
tts.copy_to_podcast_dir(tts_path)
else:
ERR(f"PODCAST_DIR does not exist: {PODCAST_DIR}")
save_email_as_markdown(this_email, summary, md_path, tts_relative)
else:
save_email_as_markdown(this_email, None, md_path, None)
if auto_respond and should_auto_respond(this_email):
DEBUG(f"Auto-responding to {this_email.subject}")
auto_response_subject = 'Auto-Response Re:' + this_email.subject
auto_response_body = await generate_auto_response_body(this_email)
DEBUG(f"Auto-response: {auto_response_body}")
await send_auto_response(this_email.sender, auto_response_subject, auto_response_body)
inbox.mark_seen(uid)
await asyncio.sleep(30)
except Exception as e:
ERR(f"An error occurred: {e}")
await asyncio.sleep(30)
def save_email_as_markdown(email: EmailModel, summary: str, md_path: Path, tts_path: Path):
'''
Saves an email as a markdown file in the specified directory.
Args:
email (EmailModel): The email object containing email details.
summary (str): The summary of the email.
tts_path (str): The path to the text-to-speech audio file.
'''
# Sanitize filename to avoid issues with filesystems
filename = f"{email.datetime_received.strftime('%Y%m%d%H%M%S')}_{email.subject.replace('/', '-')}.md".replace(':', '-').replace(' ', '_')
summary = prefix_lines(summary, '> ')
# Create the markdown content
markdown_content = f'''---
date: {email.datetime_received.strftime('%Y-%m-%d')}
tags:
- email
---
| | | |
| --: | :--: | :--: |
| *received* | **{email.datetime_received.strftime('%B %d, %Y at %H:%M:%S %Z')}** | |
| *from* | **[[{email.sender}]]** | |
| *to* | {', '.join([f'**[[{recipient}]]**' for recipient in email.recipients])} | |
| *subject* | **{email.subject}** | |
'''
if summary:
markdown_content += f'''
> [!summary] Summary
> {summary}
'''
if tts_path:
markdown_content += f'''
![[{tts_path}]]
'''
markdown_content += f'''
---
{email.body}
'''
with open(md_path, 'w', encoding='utf-8') as md_file:
md_file.write(markdown_content)
DEBUG(f"Saved markdown to {md_path}")
AUTORESPONSE_SYS = "You are a helpful AI assistant that generates personalized auto-response messages to incoming emails."
async def generate_auto_response_body(e: EmailModel, response_style: str = "professional") -> str:
age = dt_datetime.now(TZ) - e.datetime_received
prompt = f'''
Please generate a personalized auto-response to the following email. The email is from {e.sender} and was sent {age} ago with the subject line "{e.subject}." You are auto-responding on behalf of {USER_FULLNAME}, who is described by the following short bio (strictly for your context -- do not recite this in the response): "{USER_BIO}." {USER_FULLNAME} is unable to respond himself, because {AUTORESPONSE_CONTEXT}. Everything from here to ~~//END//~~ is the email body.
{e.body}
~~//END//~~
Keep your auto-response {response_style} and to the point, but do aim to make it responsive specifically to the sender's inquiry.
'''
try:
response = await llm.query_ollama(prompt, AUTORESPONSE_SYS, 400)
return response
except Exception as e:
ERR(f"Error generating auto-response: {str(e)}")
return "Thank you for your email. Unfortunately, an error occurred while generating the auto-response. We apologize for any inconvenience."
async def send_auto_response(to_email, subject, body):
try:
message = MIMEMultipart()
message['From'] = IMAP.email # smtp_username
message['To'] = to_email
message['Subject'] = subject
message.attach(MIMEText(body, 'plain'))
# DEBUG(f"Attempting to send auto_response to {to_email} concerning {subject}. We will use {IMAP.host}:{IMAP.smtp_port}, un: {IMAP.email}, pw: {IMAP.password}")
try:
DEBUG(f"Initiating attempt to send auto-response via SMTP at {IMAP.host}:{IMAP.smtp_port}...")
context = ssl._create_unverified_context()
with SMTP_SSL(IMAP.host, IMAP.smtp_port, context=context) as server:
server.login(IMAP.email, IMAP.password)
DEBUG(f"Successfully logged in to {IMAP.host} at {IMAP.smtp_port} as {IMAP.email}. Attempting to send email now.")
server.send_message(message)
INFO(f"Auto-response sent to {to_email} concerning {subject}")
except Exception as e:
ERR(f"Failed to send auto-response email to {to_email}: {e}")
raise e
except Exception as e:
ERR(f"Error in preparing/sending auto-response: {e}")
raise e
def should_auto_respond(email: EmailModel) -> bool:
def matches_list(item: str, email: EmailModel) -> bool:
if '@' in item:
if item in email.sender:
return True
else:
if item.lower() in email.subject.lower() or item.lower() in email.body.lower():
return True
return False
if AUTORESPONSE_WHITELIST:
for item in AUTORESPONSE_WHITELIST:
if matches_list(item, email):
if AUTORESPONSE_BLACKLIST:
for blacklist_item in AUTORESPONSE_BLACKLIST:
if matches_list(blacklist_item, email):
return False
return True
return False
else:
if AUTORESPONSE_BLACKLIST:
for item in AUTORESPONSE_BLACKLIST:
if matches_list(item, email):
return False
return True
@email.on_event("startup")
async def startup_event():
asyncio.create_task(process_unread_emails())

973
sijapi/routers/ig.py Normal file
View file

@ -0,0 +1,973 @@
'''
IN DEVELOPMENT: Instagram AI bot module.
'''
from fastapi import APIRouter, UploadFile
import os
import io
import copy
import re
import jwt
import json
from tqdm import tqdm
import pyotp
import time
import pytz
import requests
import tempfile
import random
import subprocess
import urllib.request
import uuid
from fastapi import APIRouter
from time import sleep
from datetime import timedelta, datetime as date
from PIL import Image
from pydantic import BaseModel
from typing import Dict, List, Optional
import instagrapi
from instagrapi import Client as igClient
from instagrapi.types import UserShort
from urllib.parse import urlparse
from instagrapi.exceptions import LoginRequired as ClientLoginRequiredError
import json
from ollama import Client as oLlama
from sd import sd
from dotenv import load_dotenv
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, COMFYUI_DIR
import io
from io import BytesIO
import base64
ig = APIRouter()
class IG_Request(BaseModel):
file: Optional[UploadFile] = None # upload a particular file to Instagram
profile: Optional[str] = None # specify the profile account to use (uses the shortnames defined per folders and the config file)
local_only: Optional[bool] = False # overrides all other settings to ensure images are generated locally and stay local
openai: Optional[str] = None # OpenAI API key; if included, will rely on it for DALL-E, GPT-4, and GPT-4-Vision unless otherwise overridden
llm: Optional[str] = "llama3" # if a valid OpenAI model name is provided, it will be used; otherwise it will attempt to match to an Ollama model (if one exists)
i2t: Optional[str] = "llava" # set to GPT-4-Vision to use the OpenAI image-2-text model, otherwise this will attempt to match to a vision-capable Ollama model
t2i: Optional[str] = None # set to DALL-E to use the OpenAI model, or use it to override the StableDiffusion workflow that's otherwise selected. Leave blank to use defaults per the config file
ig_post: Optional[str] = True # if given a value, will use this as the category of post; if given no value, willuse all categories unless ig_comment_only is enabled
ig_comment: Optional[str] = None # if given a value, will use this as the category of comment; if given no value, will use all categories unless ig_post_only is enabled
ig_comment_user: Optional[str] = None # target a particular user for comments
ig_comment_url: Optional[str] = None # target a particular ig url for comments
ghost_post: Optional[bool] = True # enable posting to Ghost
sleep_short: Optional[int] = 5 # average duration of short intervals (a few seconds is adequate; this is to simulate doomscrolling latency)
sleep_long: Optional[int] = 180 # agerage duration of long intervals (this should be about a minute at least; it simulates the time it takes to write a comment or prepare a post)
IG_PROFILE = os.getenv("IG_PROFILE")
IG_SHORT_SLEEP = int(os.getenv("IG_SHORT_SLEEP", 5))
IG_LONG_SLEEP = int(os.getenv("IG_LONG_SLEEP", 180))
IG_POST_GHOST = os.getenv("IG_POST_GHOST")
IG_VISION_LLM = os.getenv("IG_VISION_LLM")
IG_PROMPT_LLM = os.getenv("IG_PROMPT_LLM")
IG_IMG_GEN = os.getenv("IG_IMG_GEN", "ComfyUI")
IG_OUTPUT_PLATFORMS = os.getenv("IG_OUTPUT_PLATFORMS", "ig,ghost,obsidian").split(',')
SD_WORKFLOWS_DIR = os.path.join(COMFYUI_DIR, 'workflows')
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
IG_PROFILES_DIR = os.path.join(BASE_DIR, 'profiles')
IG_PROFILE_DIR = os.path.join(IG_PROFILES_DIR, PROFILE)
IG_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'images')
IG_PROFILE_CONFIG_PATH = os.path.join(IG_PROFILE_DIR, f'config.json')
IG_VIEWED_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'downloads')
with open(IG_PROFILE_CONFIG_PATH, 'r') as config_file:
PROFILE_CONFIG = json.load(config_file)
if not os.path.exists(IG_IMAGES_DIR):
os.makedirs(IG_IMAGES_DIR )
OPENAI_API_KEY=PROFILE_CONFIG.get("openai_key")
###################
### VALIDATION ###
##################
if args.profile and args.posttype and not args.custompost and not args.posttype in PROFILE_CONFIG["posts"]:
print ("ERROR: NO SUCH POST TYPE IS AVAILABLE FOR THIS PROFILE.")
if args.profile and args.commenttype and not args.commenttype in PROFILE_CONFIG["comments"]:
print ("ERROR: NO SUCH COMMENT TYPE IS AVAILABLE FOR THIS PROFILE.")
####################
### CLIENT SETUP ###
####################
cl = igClient(request_timeout=1)
IMG_GEN = OpenAI(api_key=OPENAI_API_KEY)
IMG_MODEL = "dall-e-3"
COMFYUI_URL = "http://localhost:8188"
CLIENT_ID = str(uuid.uuid4())
###############################
### INSTAGRAM & GHOST SETUP ###
###############################
IG_USERNAME = PROFILE_CONFIG.get("ig_name")
IG_PASSWORD = PROFILE_CONFIG.get("ig_pass")
IG_SECRET_KEY = PROFILE_CONFIG.get("ig_2fa_secret")
IG_SESSION_PATH = os.path.join(IG_PROFILE_DIR, f'credentials.json')
GHOST_API_URL=PROFILE_CONFIG.get("ghost_admin_url")
GHOST_API_KEY=PROFILE_CONFIG.get("ghost_admin_api_key")
GHOST_CONTENT_KEY=PROFILE_CONFIG.get("ghost_content_key")
########################
### LLM PROMPT SETUP ###
########################
IMG_PROMPT_SYS = PROFILE_CONFIG.get("img_prompt_sys")
IMG_DESCRIPTION_SYS = PROFILE_CONFIG.get("img_description_sys")
COMMENT_PROMPT_SYS = PROFILE_CONFIG.get("img_comment_sys")
HASHTAGS = PROFILE_CONFIG.get("preferred_hashtags", [])
IMAGE_URL = args.image_url
rollover_time = 1702605780
COMPLETED_MEDIA_LOG = os.path.join(IG_PROFILE_DIR, f'completed-media.txt')
TOTP = pyotp.TOTP(IG_SECRET_KEY)
SHORT = args.shortsleep
LONG = args.longsleep
def follow_by_username(username) -> bool:
"""
Follow a user, return true if successful false if not.
"""
userid = cl.user_id_from_username(username)
sleep(SHORT)
return cl.user_follow(userid)
def unfollow_by_username(username) -> bool:
"""
Unfollow a user, return true if successful false if not.
"""
userid = cl.user_id_from_username(username)
sleep(SHORT)
return cl.user_unfollow(userid)
def get_poster_of_post(shortcode):
media_info = cl.media_info_by_shortcode(shortcode)
poster_username = media_info.user.username
return(poster_username)
def get_followers(amount: int = 0) -> Dict[int, UserShort]:
"""
Get followers, return Dict of user_id and User object
"""
return cl.user_followers(cl.user_id, amount=amount)
def get_followers_usernames(amount: int = 0) -> List[str]:
"""
Get bot's followers usernames, return List of usernames
"""
followers = cl.user_followers(cl.user_id, amount=amount)
sleep(SHORT)
return [user.username for user in followers.values()]
def get_following(amount: int = 0) -> Dict[int, UserShort]:
"""
Get bot's followed users, return Dict of user_id and User object
"""
sleep(SHORT)
return cl.user_following(cl.user_id, amount=amount)
def get_user_media(username, amount=30):
"""
Fetch recent media for a given username, return List of medias
"""
DEBUG(f"Fetching recent media for {username}...")
user_id = cl.user_id_from_username(username)
medias = cl.user_medias(user_id, amount)
final_medias = []
for media in medias:
sleep(SHORT)
if media.media_type == 1:
final_medias.append(media)
return final_medias
def get_user_image_urls(username, amount=30) -> List[str]:
"""
Fetch recent media URLs for a given username, return List of media URLs
"""
DEBUG(f"Fetching recent media URLs for {username}...")
user_id = cl.user_id_from_username(username)
medias = cl.user_medias(user_id, amount)
urls = []
for media in medias:
sleep(SHORT)
if media.media_type == 1 and media.thumbnail_url:
urls.append(media.thumbnail_url)
return urls
def is_valid_url(url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception:
return False
def get_random_follower():
followers = cl.get_followers_usernames()
sleep(SHORT)
return random.choice(followers)
def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = None, amount:int = args.count):
if not ht_type:
ht_type = args.commentmode
DEBUG(f"Fetching {ht_type} media for hashtag: {hashtag}")
ht_medias = []
while True:
sleep(SHORT)
if ht_type == "top":
ht_medias.extend(cl.hashtag_medias_top(name=hashtag, amount=amount*10))
elif ht_type == "recent":
ht_medias.extend(cl.hashtag_medias_recent(name=hashtag, amount=amount*10))
filtered_medias = filter_medias(ht_medias, days_ago_max=days_ago_max)
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}")
if len(filtered_medias) >= amount:
DEBUG(f"Desired amount of {amount} filtered media reached.")
break
return filtered_medias
def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int = args.count):
if not ht_type:
ht_type = args.commentmode
DEBUG(f"Fetching {ht_type} media.")
filtered_medias = []
while len(filtered_medias) < amount:
hashtag = random.choice(HASHTAGS)
DEBUG(f"Using hashtag: {hashtag}")
fetched_medias = []
sleep(SHORT)
if ht_type == "top":
fetched_medias = cl.hashtag_medias_top(name=hashtag, amount=50) # Fetch a large batch to filter from
elif ht_type == "recent":
fetched_medias = cl.hashtag_medias_recent(name=hashtag, amount=50) # Same for recent
current_filtered_medias = filter_medias(fetched_medias, days_ago_max=days_ago_max)
filtered_medias.extend(current_filtered_medias)
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}")
# Trim the list if we've collected more than needed
if len(filtered_medias) > amount:
filtered_medias = filtered_medias[:amount]
DEBUG(f"Desired amount of {amount} filtered media reached.")
break
else:
DEBUG(f"Total filtered media count so far: {len(filtered_medias)}")
return filtered_medias
def filter_medias(
medias: List,
like_count_min=None,
like_count_max=None,
comment_count_min=None,
comment_count_max=None,
days_ago_max=None,
):
# Adjust to use your preferred timezone, for example, UTC
days_back = date.now(pytz.utc) - timedelta(days=days_ago_max) if days_ago_max else None
return [
media for media in medias
if (
(like_count_min is None or media.like_count >= like_count_min) and
(like_count_max is None or media.like_count <= like_count_max) and
(comment_count_min is None or media.comment_count >= comment_count_min) and
(comment_count_max is None or media.comment_count <= comment_count_max) and
(days_ago_max is None or (media.taken_at and media.taken_at > days_back)) and not
check_media_in_completed_lists(media)
)
]
def add_media_to_completed_lists(media):
"""
Add a media to the completed lists after interacting with it.
"""
with open(COMPLETED_MEDIA_LOG, 'a') as file:
file.write(f"{str(media.pk)}\n")
def check_media_in_completed_lists(media):
"""
Check if a media is in the completed lists.
"""
with open(COMPLETED_MEDIA_LOG, 'r') as file:
completed_media = file.read().splitlines()
return str(media.pk) in completed_media
def download_and_resize_image(url: str, download_path: str = None, max_dimension: int = 1200) -> str:
if not isinstance(url, str):
url = str(url)
parsed_url = urlparse(url)
if not download_path or not os.path.isdir(os.path.dirname(download_path)):
_, temp_file_extension = os.path.splitext(parsed_url.path)
if not temp_file_extension:
temp_file_extension = ".jpg" # Default extension if none is found
download_path = tempfile.mktemp(suffix=temp_file_extension, prefix="download_")
if url and parsed_url.scheme and parsed_url.netloc:
try:
os.makedirs(os.path.dirname(download_path), exist_ok=True)
with requests.get(url) as response:
response.raise_for_status() # Raises an HTTPError if the response was an error
image = Image.open(BytesIO(response.content))
# Resize the image, preserving aspect ratio
if max(image.size) > max_dimension:
image.thumbnail((max_dimension, max_dimension))
# Save the image, preserving the original format if possible
image_format = image.format if image.format else "jpg"
image.save(download_path, image_format)
return download_path
except Exception as e:
# Handle or log the error as needed
DEBUG(f"Error downloading or resizing image: {e}")
return None
def comment_on_user_media(user: str, comment_type: str = "default", amount=5):
"""
Comment on a user's media.
"""
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr']
medias = get_user_media(user, amount)
for media in medias:
if not check_media_in_completed_lists(media):
sleep(SHORT)
if media.thumbnail_url and is_valid_url(media.thumbnail_url):
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
if media_path is not None:
encoded_media = encode_image_to_base64(media_path)
comment_text = llava(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if comment_text:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on media: {media.pk}")
else:
DEBUG(f"Failed to generate comment for media: {media.pk}")
add_media_to_completed_lists(media)
sleep(SHORT)
else:
DEBUG(f"We received a nonetype! {media_path}")
else:
DEBUG(f"URL for {media.pk} disappeared it seems...")
else:
DEBUG(f"Media already interacted with: {media.pk}")
def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, hashtag: str = None):
"""
Comment on a hashtag's media.
"""
if not hashtag:
hashtag = random.choice(PROFILE_CONFIG['comments'][comment_type]['hashtags'])
medias = get_medias_by_hashtag(hashtag=hashtag, days_ago_max=7, amount=amount)
for media in medias:
if not check_media_in_completed_lists(media):
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
comment_text = None
if media_path and os.path.exists(media_path):
encoded_media = encode_image_to_base64(media_path)
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if (PROFILE_CONFIG['comments'][comment_type]['sentiment'] == "positive") and False is True:
try:
like_result = cl.media_like(media)
if like_result:
DEBUG(f"Liked media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Cannot like media {media.pk}: {str(e)}")
if comment_text:
try:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Cannot comment on media {media.pk}: {str(e)}")
else:
DEBUG(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}")
add_media_to_completed_lists(media)
sleep(SHORT)
else:
DEBUG(f"Media already interacted with: {media.pk}")
def comment_on_specific_media(media_url, comment_type: str = "default"):
"""
Comment on a specific media given its URL.
"""
media_id = cl.media_pk_from_url(media_url)
sleep(SHORT)
media = cl.media_info(media_id)
sleep(SHORT)
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
encoded_media = encode_image_to_base64(media_path)
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if comment_text:
try:
cl.media_comment(media.pk, comment_text)
DEBUG(f"Commented on specific media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e:
DEBUG(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}")
else:
DEBUG(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/")
def get_image(status_data, key):
"""Extract the filename and subfolder from the status data and read the file."""
try:
outputs = status_data.get("outputs", {})
images_info = outputs.get(key, {}).get("images", [])
if not images_info:
raise Exception("No images found in the job output.")
image_info = images_info[0] # Assuming the first image is the target
filename = image_info.get("filename")
subfolder = image_info.get("subfolder", "") # Default to empty if not present
file_path = os.path.join(COMFYUI_OUTPUT_DIR, subfolder, filename)
with open(file_path, 'rb') as file:
return file.read()
except KeyError as e:
raise Exception(f"Failed to extract image information due to missing key: {e}")
except FileNotFoundError:
raise Exception(f"File {filename} not found at the expected path {file_path}")
def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], path=None):
if path is None:
path = []
try:
if isinstance(workflow, dict):
for key, value in workflow.items():
current_path = path + [key]
if isinstance(value, dict):
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
found_key[0] = key
update_prompt(value, post, positive, found_key, current_path)
elif isinstance(value, list):
# Recursive call with updated path for each item in a list
for index, item in enumerate(value):
update_prompt(item, post, positive, found_key, current_path + [str(index)])
if value == "API_PPrompt":
workflow[key] = post.get(value, "") + positive
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
elif value == "API_SPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
elif value == "API_NPrompt":
workflow[key] = post.get(value, "")
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999)
DEBUG(f"Updated seed to: {workflow[key]}")
elif (key == "width" or key == "max_width" or key == "scaled_width" or key == "side_length") and (value == 1023 or value == 1025):
# workflow[key] = post.get(value, "")
workflow[key] = post.get("width", 1024)
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
# workflow[key] = post.get(value, "")
workflow[key] = post.get("height", 1024)
except Exception as e:
DEBUG(f"Error in update_prompt at path {' -> '.join(path)}: {e}")
raise
return found_key[0]
def update_prompt_custom(workflow: dict, API_PPrompt: str, API_SPrompt: str, API_NPrompt: str, found_key=[None], path=None):
if path is None:
path = []
try:
if isinstance(workflow, dict):
for key, value in workflow.items():
current_path = path + [key]
if isinstance(value, dict):
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
found_key[0] = key
update_prompt(value, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path)
elif isinstance(value, list):
# Recursive call with updated path for each item in a list
for index, item in enumerate(value):
update_prompt(item, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path + [str(index)])
if value == "API_PPrompt":
workflow[key] = API_PPrompt
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
elif value == "API_SPrompt":
workflow[key] = API_SPrompt
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
elif value == "API_NPrompt":
workflow[key] = API_NPrompt
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999)
DEBUG(f"Updated seed to: {workflow[key]}")
elif (key == "width" or key == "max_width" or key == "scaled_width") and (value == 1023 or value == 1025):
workflow[key] = 1024
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
workflow[key] = 1024
except Exception as e:
DEBUG(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}")
raise
return found_key[0]
##################################
### IMAGE GENERATION FUNCTIONS ###
##################################
def image_gen(prompt: str, model: str):
response = IMG_GEN.images.generate(
model=model,
prompt=prompt,
size="1024x1024",
quality="standard",
n=1,
)
image_url = response.data[0].url
image_path = download_and_resize_image(image_url)
return image_path
def queue_prompt(prompt: dict):
response = requests.post(f"{COMFYUI_URL}/prompt", json={"prompt": prompt, "client_id": CLIENT_ID})
if response.status_code == 200:
return response.json().get('prompt_id')
else:
raise Exception(f"Failed to queue prompt. Status code: {response.status_code}, Response body: {response.text}")
def poll_status(prompt_id):
"""Poll the job status until it's complete and return the status data."""
start_time = time.time() # Record the start time
while True:
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
# Use \r to return to the start of the line, and end='' to prevent newline
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
if status_response.status_code != 200:
raise Exception("Failed to get job status")
status_data = status_response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
DEBUG()
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
time.sleep(1)
def poll_status(prompt_id):
"""Poll the job status until it's complete and return the status data."""
start_time = time.time() # Record the start time
while True:
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
# Use \r to return to the start of the line, and end='' to prevent newline
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
if status_response.status_code != 200:
raise Exception("Failed to get job status")
status_data = status_response.json()
job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False):
DEBUG()
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data
time.sleep(1)
################################
### PRIMARY ACTIVE FUNCTIONS ###
################################
def load_post(chosen_post: str = "default"):
if chosen_post in PROFILE_CONFIG['posts']:
post = PROFILE_CONFIG['posts'][chosen_post]
DEBUG(f"Loaded post for {chosen_post}")
else:
DEBUG(f"Unable to load post for {chosen_post}. Choosing a default post.")
chosen_post = choose_post(PROFILE_CONFIG['posts'])
post = PROFILE_CONFIG['posts'][chosen_post]
DEBUG(f"Defaulted to {chosen_post}")
return post
def handle_image_workflow(chosen_post=None):
"""
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
or posting to Instagram based on the local flag.
"""
if chosen_post is None:
chosen_post = choose_post(PROFILE_CONFIG['posts'])
post = load_post(chosen_post)
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
DEBUG(f"Workflow name: {workflow_name}")
DEBUG(f"Generating image concept for {chosen_post} and {workflow_name} now.")
image_concept = query_ollama(llmPrompt = post['llmPrompt'], max_tokens = 180) if args.local or not args.openai else query_gpt4(llmPrompt = post['llmPrompt'], max_tokens = 180)
DEBUG(f"Image concept for {chosen_post}: {image_concept}")
workflow_data = None
if args.fast:
workflow_data = load_json(None, f"{workflow_name}_fast")
if workflow_data is None:
workflow_data = load_json(None, workflow_name)
if args.dalle and not args.local:
jpg_file_path = image_gen(image_concept, "dall-e-3")
else:
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
DEBUG(f"Saved file key: {saved_file_key}")
prompt_id = queue_prompt(workflow_data)
DEBUG(f"Prompt ID: {prompt_id}")
status_data = poll_status(prompt_id)
image_data = get_image(status_data, saved_file_key)
if chosen_post == "landscape":
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 2880, 100)
else:
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
image_aftergen(jpg_file_path, chosen_post)
def handle_custom_image(custom_post: str):
"""
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
or posting to Instagram based on the local flag.
"""
if args.posttype:
post = load_post(args.posttype)
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
else:
workflow_name = args.workflow if args.workflow else "selfie"
post = {
"API_PPrompt": "",
"API_SPrompt": "; (((masterpiece))); (beautiful lighting:1), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3",
"Vision_Prompt": "Write an upbeat Instagram description with emojis to accompany this selfie!",
"frequency": 2,
"ghost_tags": [
"aigenerated",
"stablediffusion",
"sdxl",
],
}
workflow_data = load_json(None, workflow_name)
system_msg = "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words."
image_concept = query_ollama(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) if args.local or not args.openai else query_gpt4(system_msg=system_msg, user_msg=custom_post, max_tokens = 180)
DEBUG(f"Image concept: {image_concept}")
if args.dalle and not args.local:
jpg_file_path = image_gen(image_concept, "dall-e-3")
else:
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
DEBUG(f"Saved file key: {saved_file_key}")
prompt_id = queue_prompt(workflow_data)
DEBUG(f"Prompt ID: {prompt_id}")
status_data = poll_status(prompt_id)
image_data = get_image(status_data, saved_file_key)
chosen_post = args.posttype if args.posttype else "custom"
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
encoded_string = encode_image_to_base64(jpg_file_path)
vision_prompt = f"Write upbeat Instagram description accompany this image, which was created by AI using the following prompt: {image_concept}"
instagram_description = llava(encoded_string, vision_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, vision_prompt, 150)
image_aftergen(jpg_file_path, chosen_post, )
def image_aftergen(jpg_file_path: str, chosen_post: str = None, post: Dict = None, prompt: str = None):
if chosen_post and not prompt:
prompt = PROFILE_CONFIG['posts'][chosen_post]['Vision_Prompt']
encoded_string = encode_image_to_base64(jpg_file_path)
DEBUG(f"Image successfully encoded from {jpg_file_path}")
instagram_description = llava(encoded_string, prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, prompt, 150)
instagram_description = re.sub(r'^["\'](.*)["\']$', r'\1', instagram_description)
ghost_tags = post['ghost_tags'] if post else PROFILE_CONFIG['posts'][chosen_post]['ghost_tags']
title_prompt = f"Generate a short 3-5 word title for this image, which already includes the following description: {instagram_description}"
# Generate img_title based on the condition provided
img_title = llava(encoded_string, title_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, title_prompt, 150)
img_title = re.sub(r'^["\'](.*)["\']$', r'\1', img_title)
# Save description to file and upload or save locally
description_filename = jpg_file_path.rsplit('.', 1)[0] + ".txt"
description_path = os.path.join(IG_IMAGES_DIR, description_filename)
with open(description_path, "w") as desc_file:
desc_file.write(instagram_description)
# Initial markdown content creation
markdown_filename = jpg_file_path.rsplit('.', 1)[0] + ".md"
markdown_content = f"""# {img_title}
![{img_title}]({jpg_file_path})
---
{instagram_description}
---
Tags: {', '.join(ghost_tags)}
"""
with open(markdown_filename, "w") as md_file:
md_file.write(markdown_content)
DEBUG(f"Markdown file created at {markdown_filename}")
if args.wallpaper:
change_wallpaper(jpg_file_path)
DEBUG(f"Wallpaper changed.")
if not args.local:
ig_footer = ""
if not args.noig:
post_url = upload_photo(jpg_file_path, instagram_description)
DEBUG(f"Image posted at {post_url}")
ig_footer = f"\n<a href=\"{post_url}\">Instagram link</a>"
if not args.noghost:
ghost_text = f"{instagram_description}"
ghost_url = post_to_ghost(img_title, jpg_file_path, ghost_text, ghost_tags)
DEBUG(f"Ghost post: {ghost_url}\n{ig_footer}")
def choose_post(posts):
total_frequency = sum(posts[post_type]['frequency'] for post_type in posts)
random_choice = random.randint(1, total_frequency)
current_sum = 0
for post_type, post_info in posts.items():
current_sum += post_info['frequency']
if random_choice <= current_sum:
return post_type
def load_json(json_payload, workflow):
if json_payload:
return json.loads(json_payload)
elif workflow:
workflow_path = os.path.join(SD_WORKFLOWS_DIR, f"{workflow}.json" if not workflow.endswith('.json') else workflow)
with open(workflow_path, 'r') as file:
return json.load(file)
else:
raise ValueError("No valid input provided.")
def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, quality=80):
chosen_post = chosen_post if chosen_post else "custom"
filename_png = f"{prompt_id}.png"
category_dir = os.path.join(IG_IMAGES_DIR, chosen_post)
image_path_png = os.path.join(category_dir, filename_png)
try:
# Ensure the directory exists
os.makedirs(category_dir, exist_ok=True)
# Save the raw PNG data to a file
with open(image_path_png, 'wb') as file:
file.write(image_data)
# Open the PNG, resize it, and save it as jpg
with Image.open(image_path_png) as img:
# Resize image if necessary
if max(img.size) > max_size:
ratio = max_size / max(img.size)
new_size = tuple([int(x * ratio) for x in img.size])
img = img.resize(new_size, Image.Resampling.LANCZOS)
# Prepare the path for the converted image
new_file_name = f"{prompt_id}.jpg"
new_file_path = os.path.join(category_dir, new_file_name)
# Convert to jpg and save
img.convert('RGB').save(new_file_path, format='JPEG', quality=quality)
# Optionally, delete the temporary PNG file
os.remove(image_path_png)
return new_file_path
except Exception as e:
DEBUG(f"Error processing image: {e}")
return None
def upload_photo(path, caption, title: str=None):
DEBUG(f"Uploading photo from {path}...")
media = cl.photo_upload(path, caption)
post_url = f"https://www.instagram.com/p/{media.code}/"
return post_url
def format_duration(seconds):
"""Return a string representing the duration in a human-readable format."""
if seconds < 120:
return f"{int(seconds)} sec"
elif seconds < 6400:
return f"{int(seconds // 60)} min"
else:
return f"{seconds / 3600:.2f} hr"
########################
### HELPER FUNCTIONS ###
########################
import subprocess
def change_wallpaper(image_path):
command = """
osascript -e 'tell application "Finder" to set desktop picture to POSIX file "{}"'
""".format(image_path)
subprocess.run(command, shell=True)
def sleep(seconds):
"""Sleep for a random amount of time, approximately the given number of seconds."""
sleepupto(seconds*0.66, seconds*1.5)
def sleepupto(min_seconds, max_seconds=None):
interval = random.uniform(min_seconds if max_seconds is not None else 0, max_seconds if max_seconds is not None else min_seconds)
start_time = time.time()
end_time = start_time + interval
with tqdm(total=interval, desc=f"Sleeping for {format_duration(interval)}", unit=" sec", ncols=75, bar_format='{desc}: {bar} {remaining}') as pbar:
while True:
current_time = time.time()
elapsed_time = current_time - start_time
remaining_time = end_time - current_time
if elapsed_time >= interval:
break
duration = min(1, interval - elapsed_time) # Adjust sleep time to not exceed interval
time.sleep(duration)
pbar.update(duration)
# Update remaining time display
pbar.set_postfix_str(f"{format_duration(remaining_time)} remaining")
########################
### GHOST FUNCTIONS ###
########################
def generate_jwt_token():
key_id, key_secret = GHOST_API_KEY.split(':')
iat = int(date.now().timestamp())
exp = iat + 5 * 60 # Token expiration time set to 5 minutes from now for consistency with the working script
payload = {
'iat': iat,
'exp': exp,
'aud': '/admin/' # Adjusted to match the working script
}
token = jwt.encode(payload, bytes.fromhex(key_secret), algorithm='HS256', headers={'kid': key_id})
return token.decode('utf-8') if isinstance(token, bytes) else token # Ensure the token is decoded to UTF-8 string
def post_to_ghost(title, image_path, html_content, ghost_tags):
jwt_token = generate_jwt_token()
ghost_headers = {'Authorization': f'Ghost {jwt_token}'}
# Upload the image to Ghost
with open(image_path, 'rb') as f:
files = {'file': (os.path.basename(image_path), f, 'image/jpg')}
image_response = requests.post(f"{GHOST_API_URL}/images/upload/", headers=ghost_headers, files=files)
image_response.raise_for_status() # Ensure the request was successful
image_url = image_response.json()['images'][0]['url']
# Prepare the post content
updated_html_content = f'<img src="{image_url}" alt="Image"/><hr/> {html_content}'
mobiledoc = {
"version": "0.3.1",
"atoms": [],
"cards": [["html", {"cardName": "html", "html": updated_html_content}]],
"markups": [],
"sections": [[10, 0]]
}
mobiledoc = json.dumps(mobiledoc)
post_data = {
'posts': [{
'title': title,
'mobiledoc': mobiledoc,
'status': 'published',
'tags': ghost_tags
}]
}
# Create a new post
post_response = requests.post(f"{GHOST_API_URL}/posts/", json=post_data, headers=ghost_headers)
post_response.raise_for_status()
post_url = post_response.json()['posts'][0]['url']
return post_url
########################################################
@ig.post("/ig/flow")
async def ig_flow_endpoint(new_session: bool = False):
current_unix_time = int(date.now().timestamp())
time_since_rollover = current_unix_time - rollover_time
time_remaining = 30 - (time_since_rollover % 30)
if time_remaining < 4:
DEBUG("Too close to end of TOTP counter. Waiting.")
sleepupto(5, 5)
if not new_session and os.path.exists(IG_SESSION_PATH):
cl.load_settings(IG_SESSION_PATH)
DEBUG("Loaded past session.")
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
cl.dump_settings(IG_SESSION_PATH)
DEBUG("Logged in and saved new session.")
else:
raise Exception(f"Failed to login as {IG_USERNAME}.")

484
sijapi/routers/llm.py Normal file
View file

@ -0,0 +1,484 @@
#routers/llm.py
from fastapi import APIRouter, HTTPException, Request, Response
from fastapi.responses import StreamingResponse, JSONResponse
from starlette.responses import StreamingResponse
from datetime import datetime as dt_datetime
from dateutil import parser
from typing import List, Dict, Any, Union
from pydantic import BaseModel, root_validator, ValidationError
import aiofiles
import os
import glob
import chromadb
from openai import OpenAI
import uuid
import json
import base64
from pathlib import Path
import ollama
from ollama import AsyncClient as Ollama, list as OllamaList
import aiofiles
import time
import asyncio
from pathlib import Path
from fastapi import FastAPI, Request, HTTPException, APIRouter
from fastapi.responses import JSONResponse, StreamingResponse
from dotenv import load_dotenv
from sijapi import BASE_DIR, DATA_DIR, LOGS_DIR, CONFIG_DIR, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import convert_to_unix_time, sanitize_filename
llm = APIRouter()
# Initialize chromadb client
client = chromadb.Client()
OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
# Function to read all markdown files in the folder
def read_markdown_files(folder: Path):
file_paths = glob.glob(os.path.join(folder, "*.md"))
documents = []
for file_path in file_paths:
with open(file_path, 'r', encoding='utf-8') as file:
documents.append(file.read())
return documents, file_paths
# Read markdown files and generate embeddings
documents, file_paths = read_markdown_files(DOC_DIR)
for i, doc in enumerate(documents):
response = ollama.embeddings(model="mxbai-embed-large", prompt=doc)
embedding = response["embedding"]
OBSIDIAN_CHROMADB_COLLECTION.add(
ids=[file_paths[i]],
embeddings=[embedding],
documents=[doc]
)
# Function to retrieve the most relevant document given a prompt
@llm.get("/retrieve_document/{prompt}")
async def retrieve_document(prompt: str):
response = ollama.embeddings(
prompt=prompt,
model="mxbai-embed-large"
)
results = OBSIDIAN_CHROMADB_COLLECTION.query(
query_embeddings=[response["embedding"]],
n_results=1
)
return {"document": results['documents'][0][0]}
# Function to generate a response using RAG
@llm.get("/generate_response/{prompt}")
async def generate_response(prompt: str):
data = await retrieve_document(prompt)
output = ollama.generate(
model="llama2",
prompt=f"Using this data: {data['document']}. Respond to this prompt: {prompt}"
)
return {"response": output['response']}
async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, max_tokens: int = 200):
messages = [{"role": "system", "content": sys},
{"role": "user", "content": usr}]
LLM = Ollama()
response = await LLM.chat(model=DEFAULT_LLM, messages=messages, options={"num_predict": max_tokens})
DEBUG(response)
if "message" in response:
if "content" in response["message"]:
content = response["message"]["content"]
return content
else:
DEBUG("No choices found in response")
return None
def is_vision_request(content):
return False
@llm.post("/v1/chat/completions")
async def chat_completions(request: Request):
body = await request.json()
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f")
filename = REQUESTS_DIR / f"request_{timestamp}.json"
async with aiofiles.open(filename, mode='w') as file:
await file.write(json.dumps(body, indent=4))
messages = body.get('messages')
if not messages:
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
requested_model = body.get('model', 'default-model')
DEBUG(f"Requested model: {requested_model}")
stream = body.get('stream')
token_limit = body.get('max_tokens') or body.get('num_predict')
# Check if the most recent message contains an image_url
recent_message = messages[-1]
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
DEBUG("Processing as a vision request")
model = "llava"
DEBUG(f"Using model: {model}")
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
else:
DEBUG("Processing as a standard request")
model = requested_model
DEBUG(f"Using model: {model}")
if stream:
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
else:
response_data = await generate_messages(messages, model)
return JSONResponse(response_data, media_type="application/json")
async def stream_messages(messages: list, model: str = "llama3", num_predict: int = 300):
async with Ollama() as async_client:
try:
index = 0
async for part in async_client.chat(model=model, messages=messages, stream=True, options={'num_predict': num_predict}):
yield "data: " + json.dumps({
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": index,
"delta": {"role": "assistant", "content": part['message']['content']},
"logprobs": None,
"finish_reason": None if 'finish_reason' not in part else part['finish_reason']
}]
}) + "\n\n"
index += 1
except Exception as e:
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
yield "data: [DONE]\n\n"
async def stream_messages_with_vision(message: dict, model: str, num_predict: int = 300):
async with Ollama() as async_client:
try:
if isinstance(message.get('content'), list):
content = message['content']
for part in content:
if part['type'] == 'image_url' and 'url' in part['image_url']:
image_url = part['image_url']['url']
if image_url.startswith('data:image'):
# Convert base64 to bytes
image_data = base64.b64decode(image_url.split('base64,')[1])
response_generator = await async_client.generate(
model=model,
prompt='explain this image:',
images=[image_data],
stream=True,
options={'num_predict': num_predict}
)
index = 0
async for response in response_generator:
yield "data: " + json.dumps({
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": index,
"delta": {"role": "assistant", "content": response['response']},
"logprobs": None,
"finish_reason": None if 'finish_reason' not in response else response['finish_reason']
}]
}) + "\n\n"
index += 1
except Exception as e:
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
yield "data: [DONE]\n\n"
def get_appropriate_model(requested_model):
if requested_model == "gpt-4-vision-preview":
return DEFAULT_VISION
elif not is_model_available(requested_model):
return DEFAULT_LLM
else:
return requested_model
def is_vision_request(content):
if isinstance(content, list):
return any(isinstance(msg, dict) and msg.get('type') == 'image_url' for msg in content)
return False
@llm.get("/v1/models")
async def get_models():
model_data = OllamaList()
formatted_models = []
for model in model_data['models']:
model_id = model['name'].split(':')[0]
formatted_models.append({
"id": model_id,
"object": "model",
"created": convert_to_unix_time(model['modified_at']),
"owned_by": "sij"
})
return JSONResponse({
"object": "list",
"data": formatted_models
})
async def generate_messages(messages: list, model: str = "llama3"):
async_client = Ollama()
try:
response = await async_client.chat(model=model, messages=messages, stream=False)
return {
"model": model,
"choices": [{
"message": {
"role": "assistant",
"content": response['message']['content']
}
}]
}
except Exception as e:
return {"error": f"Error: {str(e)}"}
def is_model_available(model_name):
model_data = OllamaList()
available_models = [model['name'] for model in model_data['models']]
DEBUG(f"Available models: {available_models}") # Log using the configured LOGGER
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
if len(matching_models) == 1:
DEBUG(f"Unique match found: {matching_models[0]}")
return True
elif len(matching_models) > 1:
ERR(f"Ambiguous match found, models: {matching_models}")
return True
else:
ERR(f"No match found for model: {model_name}")
return False
@llm.options("/chat/completions")
@llm.options("/v1/chat/completions")
async def chat_completions_options(request: Request):
return JSONResponse(
content={
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "To use the chat completions endpoint, make a POST request to /v1/chat/completions with a JSON payload containing the 'messages' array. Each message should have a 'role' (either 'system', 'user', or 'assistant') and 'content' (the message text). You can optionally specify the 'model' to use. The response will be a JSON object containing the generated completions."
},
"finish_reason": "stop"
}
],
"created": int(time.time()),
"id": str(uuid.uuid4()),
"model": DEFAULT_LLM,
"object": "chat.completion.chunk",
},
status_code=200,
headers={
"Accept": "application/json",
"Content-Type": "application/json",
"Allow": "OPTIONS, POST",
},
)
#### EMBEDDINGS
class EmbeddingRequest(BaseModel):
model: str
input: Union[str, List[str], None] = None
prompt: Union[str, List[str], None] = None
@root_validator(pre=True)
def ensure_list(cls, values):
input_value = values.get('input')
prompt_value = values.get('prompt')
if input_value and isinstance(input_value, str):
values['input'] = [input_value]
if prompt_value and isinstance(prompt_value, str):
values['prompt'] = [prompt_value]
if input_value and not prompt_value:
values['prompt'] = values['input']
values['input'] = None
return values
class EmbeddingResponse(BaseModel):
object: str
data: List[Dict[str, Any]]
model: str
usage: Dict[str, int]
@llm.post("/api/embeddings", response_model=EmbeddingResponse)
@llm.post("/v1/embeddings", response_model=EmbeddingResponse)
async def create_embedding(request: EmbeddingRequest):
try:
combined_input = " ".join(request.prompt)
response = ollama.embeddings(model=request.model, prompt=combined_input)
embedding_list = response.get("embedding", [])
data = [{
"object": "embedding",
"index": 0,
"embedding": embedding_list
}]
result = {
"object": "list",
"data": data,
"model": request.model,
"usage": {"prompt_tokens": 5, "total_tokens": 5} # Example token counts
}
return result
except ValidationError as e:
raise HTTPException(status_code=422, detail=e.errors())
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@llm.options("/api/embeddings")
@llm.options("/v1/embeddings")
async def options_embedding():
return JSONResponse(
content={},
headers={
"Allow": "OPTIONS, POST",
"Content-Type": "application/json",
"Access-Control-Allow-Methods": "OPTIONS, POST",
"Access-Control-Allow-Headers": "Content-Type"
}
)
###### PORTED FROM IGBOT, NEEDS TO BE UPDATED FOR THIS ENVIRONMENT AND MADE ASYNC: #####
def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", max_tokens: int = 150):
messages = llmPrompt if llmPrompt else [
{"role": "system", "content": system_msg},
{"role": "user", "content": user_msg}
]
LLM = OpenAI(api_key=OPENAI_API_KEY)
response = LLM.chat.completions.create(
model="gpt-4",
messages=messages,
max_tokens=max_tokens
)
if hasattr(response, "choices") and response.choices: # Checks if 'choices' attribute exists and is not empty
first_choice = response.choices[0]
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
return first_choice.message.content
else:
DEBUG("No content attribute in the first choice's message")
DEBUG(f"No content found in message string: {response.choices}")
DEBUG("Trying again!")
query_gpt4(messages, max_tokens)
else:
DEBUG(f"No content found in message string: {response}")
return ""
def llava(image_base64, prompt):
VISION_LLM = Ollama(host='http://localhost:11434')
response = VISION_LLM.generate(
model = 'llava',
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
images = [image_base64]
)
DEBUG(response)
return "" if "pass" in response["response"].lower() else response["response"]
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
VISION_LLM = OpenAI(api_key=OPENAI_API_KEY)
response_1 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "system",
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
},
{
"role": "user",
"content": [
{"type": "text", "text": f"{prompt_usr}"},
{"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}}
],
}
],
max_tokens=max_tokens,
stream=False
)
if response_1 and response_1.choices:
if len(response_1.choices) > 0:
first_choice = response_1.choices[0]
if first_choice.message and first_choice.message.content:
comment_content = first_choice.message.content
if "PASS" in comment_content:
return ""
DEBUG(f"Generated comment: {comment_content}")
response_2 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "system",
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
},
{
"role": "user",
"content": [
{"type": "text", "text": f"{prompt_usr}"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpg;base64,{image_base64}"
},
},
],
},
{
"role": "assistant",
"content": comment_content
},
{
"role": "user",
"content": "Please refine it, and remember to ONLY include the caption or comment, nothing else! That means no preface, no postscript, no notes, no reflections, and not even any acknowledgment of this follow-up message. I need to be able to use your output directly on social media. Do include emojis though."
}
],
max_tokens=max_tokens,
stream=False
)
if response_2 and response_2.choices:
if len(response_2.choices) > 0:
first_choice = response_2.choices[0]
if first_choice.message and first_choice.message.content:
final_content = first_choice.message.content
DEBUG(f"Generated comment: {final_content}")
if "PASS" in final_content:
return ""
else:
return final_content
DEBUG("Vision response did not contain expected data.")
DEBUG(f"Vision response: {response_1}")
asyncio.sleep(15)
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
return try_again

1081
sijapi/routers/note.py Normal file

File diff suppressed because it is too large Load diff

16
sijapi/routers/rag.py Normal file
View file

@ -0,0 +1,16 @@
'''
IN DEVELOPMENT: Retrieval-Augmented Generation module.
NOTES: Haven't yet decided if this should depend on the Obsidian and Chat modules, or if they should depend on it, or one of one the other the other.
'''
from fastapi import APIRouter
rag = APIRouter()
rag.get("/rag/search")
async def rag_search_endpoint(query: str, scope: str):
pass
rag.post("/rag/embed")
async def rag_upload_endpoint(path: str):
pass

73
sijapi/routers/serve.py Normal file
View file

@ -0,0 +1,73 @@
'''
Web server module. Used by other modules when serving static content is required, e.g. the sd image generation module. Also used to serve PUBLIC_KEY.
'''
import os
from fastapi import APIRouter, Form, HTTPException, Request, Response
from fastapi.responses import FileResponse, PlainTextResponse
from pathlib import Path
from datetime import datetime
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pathlib import Path
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
from sijapi import DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
serve = APIRouter(tags=["public"])
@serve.get("/pgp")
async def get_pgp():
return Response(PUBLIC_KEY, media_type="text/plain")
@serve.get("/img/{image_name}")
def serve_image(image_name: str):
image_path = os.path.join(SD_IMAGE_DIR, image_name)
if os.path.exists(image_path):
return FileResponse(image_path)
else:
return {"error": "Image not found"}
def construct_journal_path(date_str: str) -> Path:
try:
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
journal_path = OBSIDIAN_VAULT_DIR / f'journal/{date_obj:%Y}/{date_obj:%Y-%m %B}/{date_obj:%Y-%m-%d %A}/{date_obj:%Y-%m-%d %A}.md'
return journal_path
except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format")
def is_valid_date(date_str: str) -> bool:
try:
datetime.strptime(date_str, '%Y-%m-%d')
return True
except ValueError:
return False
@serve.get("/notes/{file_path:path}")
async def get_file(file_path: str):
if is_valid_date(file_path):
absolute_path, local_path = assemble_journal_path(file_path, no_timestamp = True)
else:
absolute_path = OBSIDIAN_VAULT_DIR / file_path
if not absolute_path.suffix:
absolute_path = absolute_path.with_suffix(".md")
if not absolute_path.is_file():
raise HTTPException(status_code=404, detail="File not found")
if absolute_path.suffix == '.md':
try:
with open(absolute_path, 'r', encoding='utf-8') as file:
content = file.read()
return PlainTextResponse(content)
except Exception as e:
raise HTTPException(status_code=500, detail="Internal Server Error")
elif absolute_path.suffix in ['.png', '.jpg', '.jpeg']:
return FileResponse(absolute_path)
else:
raise HTTPException(status_code=400, detail="Unsupported file type")

406
sijapi/routers/tts.py Normal file
View file

@ -0,0 +1,406 @@
from fastapi import APIRouter, UploadFile, HTTPException, Response, Form, File, BackgroundTasks, Depends, Request
from fastapi.responses import Response, StreamingResponse, FileResponse
from fastapi.responses import StreamingResponse, PlainTextResponse
import requests
import json
import shutil
from io import BytesIO
import asyncio
from pydantic import BaseModel
from typing import Optional, Union, List
from pydub import AudioSegment
from TTS.api import TTS
from pathlib import Path
from datetime import datetime
from time import time
import torch
import traceback
import hashlib
import uuid
import httpx
import tempfile
import random
import re
import os
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import HOME_DIR, DATA_DIR, DEFAULT_VOICE, TTS_DIR, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY
from sijapi.utilities import sanitize_filename
### INITIALIZATIONS ###
tts = APIRouter(tags=["trusted", "private"])
DEVICE = torch.device('cpu')
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
@tts.get("/tts/local_voices", response_model=List[str])
async def list_wav_files():
wav_files = [file.split('.')[0] for file in os.listdir(VOICE_DIR) if file.endswith(".wav")]
return wav_files
@tts.get("/tts/elevenlabs_voices")
async def list_11l_voices():
formatted_list = ""
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers)
DEBUG(f"Response: {response}")
if response.status_code == 200:
voices_data = response.json().get("voices", [])
formatted_list = ""
for voice in voices_data:
name = voice["name"]
id = voice["voice_id"]
formatted_list += f"{name}: `{id}`\n"
except Exception as e:
ERR(f"Error determining voice ID: {str(e)}")
return PlainTextResponse(formatted_list, status_code=200)
def select_voice(voice_name: str) -> str:
try:
voice_file = VOICE_DIR / f"{voice_name}.wav"
DEBUG(f"select_voice received query to use voice: {voice_name}. Looking for {voice_file} inside {VOICE_DIR}.")
if voice_file.is_file():
return str(voice_file)
else:
raise HTTPException(status_code=404, detail="Voice file not found")
except Exception as e:
ERR(f"Voice file not found: {str(e)}")
ERR(traceback.format_exc())
raise HTTPException(status_code=404, detail="Voice file not found")
@tts.post("/tts/speak")
@tts.post("/v1/audio/speech")
async def generate_speech_endpoint(
request: Request,
background_tasks: BackgroundTasks,
model: str = Form("eleven_turbo_v2"),
text: Optional[str] = Form(None),
file: Optional[UploadFile] = File(None),
voice: Optional[str] = Form(None),
voice_file: Optional[UploadFile] = File(None),
speed: Optional[float] = Form(1.1),
podcast: Union[bool, str] = Form(False),
stream: bool = Form(True)
):
try:
podcast = podcast if isinstance(podcast, bool) else podcast.lower() == 'true'
text_content = await get_text_content(text, file)
if stream:
model = model if model else await get_model(voice, voice_file)
if model == "eleven_turbo_v2":
voice_id = await determine_voice_id(voice)
audio_stream = await get_audio_stream(model, text_content, voice_id)
return StreamingResponse(audio_stream, media_type="audio/mpeg")
else:
return await stream_tts(text_content, speed, voice, voice_file)
else:
return await generate_speech(background_tasks, text_content, voice, voice_file, model, speed, podcast)
except Exception as e:
ERR(f"Error in TTS: {str(e)}")
ERR(traceback.format_exc())
raise HTTPException(status_code=666, detail="error in TTS")
async def generate_speech(
background_tasks: BackgroundTasks,
text: str,
voice: str = None,
voice_file: UploadFile = None,
model: str = None,
speed: float = 1.1,
podcast: bool = False,
title: str = None,
output_dir = None
) -> str:
output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
if not output_dir.exists():
output_dir.mkdir(parents=True)
try:
model = model if model else await get_model(voice, voice_file)
if model == "eleven_turbo_v2":
INFO(f"Using ElevenLabs.")
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
return str(audio_file_path)
elif model == "xtts":
INFO(f"Using XTTS2")
final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, background_tasks, title, output_dir)
background_tasks.add_task(os.remove, str(final_output_dir))
return str(final_output_dir)
else:
raise HTTPException(status_code=400, detail="Invalid model specified")
except HTTPException as e:
ERR(f"HTTP error: {e}")
ERR(traceback.format_exc())
raise e
except Exception as e:
ERR(f"Error: {e}")
ERR(traceback.format_exc())
raise e
async def get_model(voice: str = None, voice_file: UploadFile = None):
if voice_file or (voice and select_voice(voice)):
return "xtts"
elif voice and await determine_voice_id(voice):
return "eleven_turbo_v2"
else:
raise HTTPException(status_code=400, detail="No model or voice specified")
async def determine_voice_id(voice_name: str) -> str:
hardcoded_voices = {
"alloy": "E3A1KVbKoWSIKSZwSUsW",
"echo": "b42GBisbu9r5m5n6pHF7",
"fable": "KAX2Y6tTs0oDWq7zZXW7",
"onyx": "clQb8NxY08xZ6mX6wCPE",
"nova": "6TayTBKLMOsghG7jYuMX",
"shimmer": "E7soeOyjpmuZFurvoxZ2",
DEFAULT_VOICE: "6TayTBKLMOsghG7jYuMX",
"Sangye": "E7soeOyjpmuZFurvoxZ2",
"Herzog": "KAX2Y6tTs0oDWq7zZXW7",
"Attenborough": "b42GBisbu9r5m5n6pHF7"
}
if voice_name in hardcoded_voices:
voice_id = hardcoded_voices[voice_name]
DEBUG(f"Found voice ID - {voice_id}")
return voice_id
DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.")
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, headers=headers)
DEBUG(f"Response: {response}")
if response.status_code == 200:
voices_data = response.json().get("voices", [])
for voice in voices_data:
if voice_name == voice["voice_id"] or voice_name == voice["name"]:
return voice["voice_id"]
except Exception as e:
ERR(f"Error determining voice ID: {str(e)}")
return "6TayTBKLMOsghG7jYuMX"
async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = None, output_dir: str = None):
voice_id = await determine_voice_id(voice)
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
payload = {
"text": input_text,
"model_id": model
}
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client:
response = await client.post(url, json=payload, headers=headers)
output_dir = output_dir if output_dir else TTS_OUTPUT_DIR
title = title if title else datetime.now().strftime("%Y%m%d%H%M%S")
filename = f"{sanitize_filename(title)}.mp3"
file_path = Path(output_dir) / filename
if response.status_code == 200:
with open(file_path, "wb") as audio_file:
audio_file.write(response.content)
return file_path
else:
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str:
if file:
return (await file.read()).decode("utf-8").strip()
elif text:
return text.strip()
else:
raise HTTPException(status_code=400, detail="No text provided")
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
if voice:
return select_voice(voice)
elif voice_file and isinstance(voice_file, UploadFile):
VOICE_DIR.mkdir(exist_ok=True)
content = await voice_file.read()
checksum = hashlib.md5(content).hexdigest()
existing_file = VOICE_DIR / voice_file.filename
if existing_file.is_file():
with open(existing_file, 'rb') as f:
existing_checksum = hashlib.md5(f.read()).hexdigest()
if checksum == existing_checksum:
return str(existing_file)
base_name = existing_file.stem
counter = 1
new_file = existing_file
while new_file.is_file():
new_file = VOICE_DIR / f"{base_name}{counter:02}.wav"
counter += 1
with open(new_file, 'wb') as f:
f.write(content)
return str(new_file)
else:
DEBUG(f"{datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
return select_voice(DEFAULT_VOICE)
async def local_tts(text_content: str, speed: float, voice: str, voice_file = None, podcast: bool = False, background_tasks: BackgroundTasks = None, title: str = None, output_path: Optional[Path] = None) -> str:
if output_path:
file_path = Path(output_path)
else:
datetime_str = datetime.now().strftime("%Y%m%d%H%M%S")
title = sanitize_filename(title) if title else "Audio"
filename = f"{datetime_str}_{title}.wav"
file_path = TTS_OUTPUT_DIR / filename
# Ensure the parent directory exists
file_path.parent.mkdir(parents=True, exist_ok=True)
voice_file_path = await get_voice_file_path(voice, voice_file)
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
segments = split_text(text_content)
combined_audio = AudioSegment.silent(duration=0)
for i, segment in enumerate(segments):
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
DEBUG(f"Segment file path: {segment_file_path}")
segment_file = await asyncio.to_thread(XTTS.tts_to_file, text=segment, speed=speed, file_path=str(segment_file_path), speaker_wav=[voice_file_path], language="en")
DEBUG(f"Segment file generated: {segment_file}")
combined_audio += AudioSegment.from_wav(str(segment_file))
# Delete the segment file immediately after adding it to the combined audio
segment_file_path.unlink()
if podcast:
podcast_file_path = PODCAST_DIR / file_path.name
combined_audio.export(podcast_file_path, format="wav")
combined_audio.export(file_path, format="wav")
return str(file_path)
async def stream_tts(text_content: str, speed: float, voice: str, voice_file) -> StreamingResponse:
voice_file_path = await get_voice_file_path(voice, voice_file)
segments = split_text(text_content)
async def audio_stream_generator():
for segment in segments:
segment_file = await generate_tts(segment, speed, voice_file_path)
with open(segment_file, 'rb') as f:
while chunk := f.read(1024):
yield chunk
os.remove(segment_file)
return StreamingResponse(audio_stream_generator(), media_type='audio/wav')
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en")
return output_dir
async def get_audio_stream(model: str, input_text: str, voice: str):
voice_id = await determine_voice_id(voice)
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
payload = {
"text": input_text,
"model_id": "eleven_turbo_v2"
}
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200:
return response.iter_content(1024)
else:
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
def split_text(text, target_length=35, max_length=50):
text = clean_text_for_tts(text)
sentences = re.split(r'(?<=[.!?"])\s+', text)
segments = []
current_segment = []
for sentence in sentences:
sentence_words = sentence.split()
segment_length = len(' '.join(current_segment).split())
if segment_length + len(sentence_words) > max_length:
segments.append(' '.join(current_segment))
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
current_segment = [sentence]
else:
current_segment.extend(sentence_words)
if current_segment:
segments.append(' '.join(current_segment))
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
return segments
def clean_text_for_tts(text: str) -> str:
if text is not None:
text = text.replace("\n", " ").replace("\r", " ")
text = re.sub(r"[^\w\s.,;:!?'\"]", '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
else:
DEBUG(f"No text received.")
def copy_to_podcast_dir(file_path):
try:
# Extract the file name from the file path
file_name = Path(file_path).name
# Construct the destination path in the PODCAST_DIR
destination_path = PODCAST_DIR / file_name
# Copy the file to the PODCAST_DIR
shutil.copy(file_path, destination_path)
print(f"File copied successfully to {destination_path}")
except FileNotFoundError:
print(f"File not found: {file_path}")
except shutil.SameFileError:
print(f"Source and destination are the same file: {file_path}")
except PermissionError:
print(f"Permission denied while copying the file: {file_path}")
except Exception as e:
print(f"An error occurred while copying the file: {file_path}")
print(f"Error details: {str(e)}")

265
sijapi/routers/weather.py Normal file
View file

@ -0,0 +1,265 @@
import asyncio
from fastapi import APIRouter, HTTPException
from fastapi import HTTPException
from asyncpg.cursor import Cursor
from httpx import AsyncClient
from typing import Dict
from datetime import datetime
from shapely.wkb import loads
from binascii import unhexlify
from sijapi.utilities import localize_dt
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import VISUALCROSSING_API_KEY, TZ
from sijapi.utilities import get_db_connection, haversine
from sijapi.routers import locate
weather = APIRouter()
async def get_weather(date_time: datetime, latitude: float, longitude: float):
# request_date_str = date_time.strftime("%Y-%m-%d")
DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
fetch_new_data = True
if daily_weather_data:
try:
DEBUG(f"Daily weather data from db: {daily_weather_data}")
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
last_updated = localize_dt(last_updated)
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
stored_loc = loads(stored_loc_data)
stored_lat = stored_loc.y
stored_lon = stored_loc.x
stored_ele = stored_loc.z
hourly_weather = daily_weather_data.get('HourlyWeather')
DEBUG(f"Hourly: {hourly_weather}")
DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n")
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
if last_updated and (date_time <= datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
DEBUG(f"We can use existing data... :')")
fetch_new_data = False
except Exception as e:
ERR(f"Error in get_weather: {e}")
if fetch_new_data:
DEBUG(f"We require new data!")
request_date_str = date_time.strftime("%Y-%m-%d")
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
try:
async with AsyncClient() as client:
response = await client.get(url)
if response.status_code == 200:
DEBUG(f"Successfully obtained data from VC...")
try:
weather_data = response.json()
store_result = await store_weather_to_db(date_time, weather_data)
if store_result == "SUCCESS":
DEBUG(f"New weather data for {request_date_str} stored in database...")
else:
ERR(f"Failed to store weather data for {request_date_str} in database! {store_result}")
DEBUG(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data is not None:
return daily_weather_data
else:
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
except Exception as e:
ERR(f"Problem parsing VC response or storing data: {e}")
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
else:
ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}")
except Exception as e:
ERR(f"Exception during API call: {e}")
return daily_weather_data
async def store_weather_to_db(date_time: datetime, weather_data: dict):
conn = await get_db_connection()
try:
day_data = weather_data.get('days')[0]
DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}")
# Handle preciptype and stations as PostgreSQL arrays
preciptype_array = day_data.get('preciptype', []) or []
stations_array = day_data.get('stations', []) or []
date_str = date_time.strftime("%Y-%m-%d")
# Get location details from weather data if available
longitude = weather_data.get('longitude')
latitude = weather_data.get('latitude')
elevation = locate.get_elevation(latitude, longitude) # 152.4 # default until we add a geocoder that can look up actual elevation; weather_data.get('elevation') # assuming 'elevation' key, replace if different
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
# Correct for the datetime objects
day_data['datetime'] = localize_dt(day_data.get('datetime')) #day_data.get('datetime'))
day_data['sunrise'] = day_data['datetime'].replace(hour=int(day_data.get('sunrise').split(':')[0]), minute=int(day_data.get('sunrise').split(':')[1]))
day_data['sunset'] = day_data['datetime'].replace(hour=int(day_data.get('sunset').split(':')[0]), minute=int(day_data.get('sunset').split(':')[1]))
daily_weather_params = (
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
day_data.get('sunset'), day_data.get('sunsetEpoch'),
day_data.get('description'), day_data.get('tempmax'),
day_data.get('tempmin'), day_data.get('uvindex'),
day_data.get('winddir'), day_data.get('windspeed'),
day_data.get('icon'), datetime.now(),
day_data.get('datetime'), day_data.get('datetimeEpoch'),
day_data.get('temp'), day_data.get('feelslikemax'),
day_data.get('feelslikemin'), day_data.get('feelslike'),
day_data.get('dew'), day_data.get('humidity'),
day_data.get('precip'), day_data.get('precipprob'),
day_data.get('precipcover'), preciptype_array,
day_data.get('snow'), day_data.get('snowdepth'),
day_data.get('windgust'), day_data.get('pressure'),
day_data.get('cloudcover'), day_data.get('visibility'),
day_data.get('solarradiation'), day_data.get('solarenergy'),
day_data.get('severerisk', 0), day_data.get('moonphase'),
day_data.get('conditions'), stations_array, day_data.get('source'),
location_point
)
except Exception as e:
ERR(f"Failed to prepare database query in store_weather_to_db! {e}")
try:
daily_weather_query = '''
INSERT INTO DailyWeather (
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
dew, humidity, precip, precipprob, precipcover, preciptype,
snow, snowdepth, windgust, pressure, cloudcover, visibility,
solarradiation, solarenergy, severerisk, moonphase, conditions,
stations, source, location
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38)
RETURNING id
'''
# Debug logs for better insights
# DEBUG("Executing query: %s", daily_weather_query)
# DEBUG("With parameters: %s", daily_weather_params)
# Execute the query to insert daily weather data
async with conn.transaction():
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
if 'hours' in day_data:
for hour_data in day_data['hours']:
try:
await asyncio.sleep(0.1)
# hour_data['datetime'] = parse_date(hour_data.get('datetime'))
hour_timestamp = date_str + ' ' + hour_data['datetime']
hour_data['datetime'] = localize_dt(hour_timestamp)
DEBUG(f"Processing hours now...")
# DEBUG(f"Processing {hour_data['datetime']}")
hour_preciptype_array = hour_data.get('preciptype', []) or []
hour_stations_array = hour_data.get('stations', []) or []
hourly_weather_params = (
daily_weather_id,
hour_data['datetime'],
hour_data.get('datetimeEpoch'),
hour_data['temp'],
hour_data['feelslike'],
hour_data['humidity'],
hour_data['dew'],
hour_data['precip'],
hour_data['precipprob'],
hour_preciptype_array,
hour_data['snow'],
hour_data['snowdepth'],
hour_data['windgust'],
hour_data['windspeed'],
hour_data['winddir'],
hour_data['pressure'],
hour_data['cloudcover'],
hour_data['visibility'],
hour_data['solarradiation'],
hour_data['solarenergy'],
hour_data['uvindex'],
hour_data.get('severerisk', 0),
hour_data['conditions'],
hour_data['icon'],
hour_stations_array,
hour_data.get('source', ''),
)
try:
hourly_weather_query = '''
INSERT INTO HourlyWeather (daily_weather_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob,
preciptype, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy,
uvindex, severerisk, conditions, icon, stations, source)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26)
RETURNING id
'''
# Debug logs for better insights
# DEBUG("Executing query: %s", hourly_weather_query)
# DEBUG("With parameters: %s", hourly_weather_params)
# Execute the query to insert hourly weather data
async with conn.transaction():
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
# ERR(f"\n{hourly_weather_id}")
except Exception as e:
ERR(f"EXCEPTION: {e}")
except Exception as e:
ERR(f"EXCEPTION: {e}")
return "SUCCESS"
except Exception as e:
ERR(f"Error in dailyweather storage: {e}")
async def get_weather_from_db(date_time: datetime, latitude: float, longitude: float):
conn = await get_db_connection()
query_date = date_time.date()
try:
# Query to get daily weather data
query = '''
SELECT DW.* FROM DailyWeather DW
WHERE DW.datetime::date = $1
AND ST_DWithin(DW.location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
ORDER BY ST_Distance(DW.location, ST_MakePoint($4, $5)::geography) ASC
LIMIT 1
'''
daily_weather_data = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
if daily_weather_data is None:
DEBUG(f"No daily weather data retrieved from database.")
return None
# else:
# DEBUG(f"Daily_weather_data: {daily_weather_data}")
# Query to get hourly weather data
query = '''
SELECT HW.* FROM HourlyWeather HW
WHERE HW.daily_weather_id = $1
'''
hourly_weather_data = await conn.fetch(query, daily_weather_data['id'])
day: Dict = {
'DailyWeather': dict(daily_weather_data),
'HourlyWeather': [dict(row) for row in hourly_weather_data],
}
# DEBUG(f"day: {day}")
return day
except Exception as e:
ERR(f"Unexpected error occurred: {e}")

427
sijapi/utilities.py Normal file
View file

@ -0,0 +1,427 @@
import re
import os
from fastapi import Form
import re
import io
from io import BytesIO
import base64
import math
from dateutil import parser
from pathlib import Path
import filetype
from PyPDF2 import PdfReader
from pdfminer.high_level import extract_text as pdfminer_extract_text
import pytesseract
from pdf2image import convert_from_path
from datetime import datetime, date, time
from typing import Optional, Union, Tuple
import asyncio
from PIL import Image
from dateutil.parser import parse as dateutil_parse
from docx import Document
import asyncpg
from sshtunnel import SSHTunnelForwarder
from fastapi import Depends, HTTPException, Request, UploadFile
from fastapi.security.api_key import APIKeyHeader
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import DB, GLOBAL_API_KEY, DB, DB_HOST, DB_PORT, DB_USER, DB_PASS, TZ, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR
api_key_header = APIKeyHeader(name="Authorization")
def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
if request.url.path not in ["/health", "/ip", "/pgp"]:
api_key_query = request.query_params.get("api_key")
if api_key_header:
api_key = api_key.lower().split("bearer ")[-1]
if api_key != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
raise HTTPException(status_code=401, detail="Invalid or missing API key")
def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str = None, extension: str = None, no_timestamp: bool = False) -> Tuple[Path, Path]:
'''
Obsidian helper. Takes a datetime and optional subdirectory name, filename, and extension.
If an extension is provided, it ensures the path is to a file with that extension.
If no extension is provided, it treats the path as a directory.
'''
year = date_time.strftime(YEAR_FMT)
month = date_time.strftime(MONTH_FMT)
day = date_time.strftime(DAY_FMT)
day_short = date_time.strftime(DAY_SHORT_FMT)
timestamp = date_time.strftime("%H%M%S")
relative_path = Path("journal") / year / month / day
if not subdir and not filename and not extension:
# standard daily note handler, where only the date_time was specified:
relative_path = relative_path / f"{day}.md"
else:
if subdir:
# datestamped subdirectory handler
relative_path = relative_path / f"{day_short} {subdir}"
if filename:
if no_timestamp:
filename = f"{day_short} {sanitize_filename(filename)}"
else:
filename = f"{day_short} {timestamp} {sanitize_filename(filename)}"
if extension:
extension = extension if extension.startswith(".") else f".{extension}"
filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
relative_path = relative_path / filename
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
os.makedirs(absolute_path.parent, exist_ok=True)
return absolute_path, relative_path
def prefix_lines(text: str, prefix: str = '> ') -> str:
lines = text.split('\n')
prefixed_lines = [f"{prefix}{line.lstrip()}" for line in lines]
return '\n'.join(prefixed_lines)
def f(file):
if hasattr(file, 'read') and callable(file.read):
return file
if isinstance(file, (bytes, bytearray)):
return file
if isinstance(file, Path):
file_path = file
elif isinstance(file, str):
file_path = Path(file)
else:
raise TypeError("Invalid file type. Expected str, Path, or file-like object.")
with open(file_path, 'rb') as thefile:
return thefile
def get_extension(file):
try:
if isinstance(file, str):
file_path = Path(file)
elif isinstance(file, Path):
file_path = file
else:
file_path = Path(file.filename)
file_extension = file_path.suffix
return file_extension
except Exception as e:
ERR(f"Unable to get extension of {file}")
raise e
def sanitize_filename(text, max_length=255):
"""Sanitize a string to be used as a safe filename."""
DEBUG(f"Filename before sanitization: {text}")
sanitized = re.sub(r'[^\w\s\.-]', '', text).strip()
final_filename = sanitized[:max_length]
DEBUG(f"Filename after sanitization: {final_filename}")
return final_filename
def bool_convert(value: str = Form(None)):
return value.lower() in ["true", "1", "t", "y", "yes"]
def str_to_bool(value: str) -> bool:
"""
Convert a string to a boolean.
Interprets 'true', '1', 'yes', 'y' as True.
Interprets 'false', '0', 'no', 'n', '', or any other string as False.
"""
def get_timestamp():
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
async def extract_text(file_path: str) -> str:
"""Extract text from file."""
if file_path.endswith('.pdf'):
return await extract_text_from_pdf(file_path)
elif file_path.endswith('.docx'):
return await extract_text_from_docx(file_path)
def clean_text(text):
text = text.replace('-', '')
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'[\u200B-\u200D\uFEFF]', '', text)
return text.strip()
async def ocr_pdf(file_path: str) -> str:
try:
images = await asyncio.to_thread(convert_from_path, file_path)
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
return ' '.join(texts)
except Exception as e:
ERR(f"Error during OCR: {str(e)}")
return ""
async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path):
ERR(f"Invalid PDF file: {file_path}")
return ""
text = ''
num_pages = 0
# First, attempt to extract text using PyPDF2
try:
reader = await asyncio.to_thread(PdfReader, file_path)
for page in reader.pages:
text_content = page.extract_text() + ' ' if page.extract_text() else ''
text += text_content
num_pages = len(reader.pages)
# If text was extracted successfully and it's deemed sufficient, return it
if text and not should_use_ocr(text, num_pages):
return clean_text(text)
except Exception as e:
ERR(f"Error extracting text with PyPDF2: {str(e)}")
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
return clean_text(text_pdfminer)
except Exception as e:
ERR(f"Error extracting text with pdfminer.six: {e}")
# If both methods fail or are deemed insufficient, use OCR as the last resort
INFO("Falling back to OCR for text extraction...")
return await ocr_pdf(file_path)
async def is_valid_pdf(file_path: str) -> bool:
"""Check if the file at file_path is a valid PDF."""
try:
kind = filetype.guess(file_path)
return kind.mime == 'application/pdf'
except Exception as e:
ERR(f"Error checking file type: {e}")
return False
async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path):
ERR(f"Invalid PDF file: {file_path}")
return ""
text = ''
try:
reader = await asyncio.to_thread(PdfReader, file_path)
for page in reader.pages:
text_content = page.extract_text() + ' ' if page.extract_text() else ''
text += text_content
if text.strip(): # Successfully extracted text
return clean_text(text)
except Exception as e:
ERR(f"Error extracting text with PyPDF2: {str(e)}")
try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer.strip(): # Successfully extracted text
return clean_text(text_pdfminer)
except Exception as e:
ERR(f"Error extracting text with pdfminer.six: {str(e)}")
# Fall back to OCR
INFO("Falling back to OCR for text extraction...")
try:
images = convert_from_path(file_path)
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
return ' '.join(ocr_texts).strip()
except Exception as e:
ERR(f"OCR failed: {str(e)}")
return ""
async def extract_text_from_docx(file_path: str) -> str:
def read_docx(file_path):
doc = Document(file_path)
full_text = [paragraph.text for paragraph in doc.paragraphs]
return '\n'.join(full_text)
return await asyncio.to_thread(read_docx, file_path)
# Correcting read_text_file to be asynchronous
async def read_text_file(file_path: str) -> str:
# This opens and reads a file asynchronously by offloading to a separate thread
return await asyncio.to_thread(_sync_read_text_file, file_path)
def _sync_read_text_file(file_path: str) -> str:
# Actual synchronous file reading operation
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def should_use_ocr(text, num_pages) -> bool:
if not text:
return True # No text was extracted, use OCR
word_count = len(text.split())
avg_words_per_page = word_count / num_pages
return avg_words_per_page < 10
def convert_to_unix_time(iso_date_str):
dt = parser.parse(iso_date_str) # Automatically parses datetime with timezone
return int(dt.timestamp())
async def get_db_connection():
conn = await asyncpg.connect(
database=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
return conn
temp = """
def get_db_connection_ssh(ssh: bool = True):
if ssh:
with SSHTunnelForwarder(
(DB_SSH, 22),
DB_SSH_USER=DB_SSH_USER,
DB_SSH_PASS=DB_SSH_PASS,
remote_bind_address=DB_SSH,
local_bind_address=(DB_HOST, DB_PORT)
) as tunnel: conn = psycopg2.connect(
dbname=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
else:
conn = psycopg2.connect(
dbname=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
return conn
"""
def db_localized():
# ssh = True if TS_IP == DB_SSH else False
return get_db_connection()
def haversine(lat1, lon1, lat2, lon2):
""" Calculate the great circle distance between two points on the earth specified in decimal degrees. """
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
c = 2 * math.asin(math.sqrt(a))
r = 6371 # Radius of Earth in kilometers
return c * r
def convert_degrees_to_cardinal(d):
"""
Convert degrees to cardinal directions
"""
dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
ix = round(d / (360. / len(dirs)))
return dirs[ix % len(dirs)]
def localize_dt(dt):
initial_dt = dt
try:
if isinstance(dt, str):
dt = dateutil_parse(dt)
DEBUG(f"{initial_dt} was a string so we attempted converting to datetime. Result: {dt}")
if isinstance(dt, datetime):
DEBUG(f"{dt} is a datetime object, so we will ensure it is tz-aware.")
if dt.tzinfo is None:
dt = dt.replace(tzinfo=TZ)
# DEBUG(f"{dt} should now be tz-aware. Returning it now.")
return dt
else:
# DEBUG(f"{dt} already was tz-aware. Returning it now.")
return dt
else:
ERR(f"Conversion failed")
raise TypeError("Conversion failed")
except Exception as e:
ERR(f"Error parsing datetime: {e}")
raise TypeError("Input must be a string or datetime object")
HOURLY_COLUMNS_MAPPING = {
"12am": "00:00:00",
"2am": "02:00:00",
"4am": "04:00:00",
"6am": "06:00:00",
"8am": "08:00:00",
"10am": "10:00:00",
"12pm": "12:00:00",
"2pm": "14:00:00",
"4pm": "16:00:00",
"6pm": "18:00:00",
"8pm": "20:00:00",
"10pm": "22:00:00",
}
def convert_to_12_hour_format(datetime_obj_or_str):
if isinstance(datetime_obj_or_str, str):
try:
datetime_obj = datetime.strptime(datetime_obj_or_str, "%Y-%m-%d %H:%M:%S")
except ValueError:
try:
datetime_obj = datetime.strptime(datetime_obj_or_str, "%H:%M:%S")
except ValueError:
return "Invalid datetime string format"
elif isinstance(datetime_obj_or_str, time):
datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S")
else:
datetime_obj = datetime_obj_or_str
if isinstance(datetime_obj_or_str, str):
time24 = datetime_obj_or_str
else:
time24 = datetime_obj.strftime("%H:%M:%S")
reverse_mapping = {v: k for k, v in HOURLY_COLUMNS_MAPPING.items()}
return reverse_mapping.get(time24, "Invalid time")
def encode_image_to_base64(image_path):
if os.path.exists(image_path):
with Image.open(image_path) as image:
output_buffer = BytesIO()
image.save(output_buffer, format='JPEG')
byte_data = output_buffer.getvalue()
base64_str = base64.b64encode(byte_data).decode('utf-8')
return base64_str
else:
DEBUG(f"Error: File does not exist at {image_path}")
def resize_and_convert_image(image_path, max_size=2160, quality=80):
with Image.open(image_path) as img:
# Resize image
ratio = max_size / max(img.size)
new_size = tuple([int(x * ratio) for x in img.size])
img = img.resize(new_size, Image.Resampling.LANCZOS)
# Convert to jpg
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='JPEG', quality=quality)
img_byte_arr = img_byte_arr.getvalue()
return img_byte_arr