Initial commit
This commit is contained in:
commit
0fdd15fc72
20 changed files with 5606 additions and 0 deletions
56
.gitignore
vendored
Normal file
56
.gitignore
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
\# Ignore specific data files and directories
|
||||
sijapi/data/calendar.ics
|
||||
sijapi/data/asr/
|
||||
sijapi/data/geocoder/
|
||||
sijapi/data/courtlistener/
|
||||
sijapi/data/tts/
|
||||
sijapi/data/db/
|
||||
sijapi/data/sd/workflows/private
|
||||
sijapi/data/*.pbf
|
||||
sijapi/data/geonames.txt
|
||||
sijapi/data/sd/images/
|
||||
sijapi/config/O365/
|
||||
sijapi/local_only/
|
||||
sijapi/testbed/
|
||||
|
||||
**/.env
|
||||
**/.config.yaml
|
||||
**/*.log
|
||||
**/logs/
|
||||
**/__pycache__
|
||||
**/.DS_Store
|
||||
**/*.ics
|
||||
**/*.sqlite
|
||||
**/private/
|
||||
**/*sync-conflict*.*
|
||||
**/*.db
|
||||
**/*.mp3
|
||||
**/*.mp4
|
||||
**/*.wav
|
||||
**/*.pyc
|
||||
**/.ipynb_checkpoints/
|
||||
venv/
|
||||
env/
|
||||
.venv/
|
||||
.vscode/
|
||||
.idea/
|
||||
*~
|
||||
*.swp
|
||||
*.swo
|
||||
*.com
|
||||
*.class
|
||||
*.dll
|
||||
*.exe
|
||||
*.o
|
||||
*.so
|
||||
*.7z
|
||||
*.dmg
|
||||
*.gz
|
||||
*.iso
|
||||
*.jar
|
||||
*.rar
|
||||
*.tar
|
||||
*.zip
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
sijapi/testbed/
|
17
README.md
Normal file
17
README.md
Normal file
|
@ -0,0 +1,17 @@
|
|||
```
|
||||
.x+=:. . .
|
||||
z` ^% @88> .. @88>
|
||||
. <k %8P 888> .d`` %8P
|
||||
.@8Ned8" . "8P u @8Ne. .u .
|
||||
.@^%8888" .@88u . us888u. %8888:u@88N .@88u
|
||||
x88: `)8b. ''888E` u888u. .@88 "8888" `888I 888. ''888E`
|
||||
8888N=*8888 888E `'888E 9888 9888 888I 888I 888E
|
||||
%8" R88 888E 888E 9888 9888 888I 888I 888E
|
||||
@8Wou 9% 888E 888E 9888 9888 uW888L 888' 888E
|
||||
.888888P` 888& 888E 9888 9888 '*88888Nu88P 888&
|
||||
` ^"F R888" 888E "888*""888"~ '88888F` R888"
|
||||
"" 888E ^Y" ^Y' 888 ^ ""
|
||||
888E *8E
|
||||
888P '8>
|
||||
.J88" "
|
||||
```
|
47
requirements.txt
Normal file
47
requirements.txt
Normal file
|
@ -0,0 +1,47 @@
|
|||
python-dotenv
|
||||
setuptools
|
||||
PyPDF2
|
||||
fastapi
|
||||
pdf2image
|
||||
pdfminer
|
||||
pytesseract
|
||||
python-dateutil
|
||||
python-docx
|
||||
hypercorn
|
||||
starlette
|
||||
httpx
|
||||
pydantic
|
||||
pytz
|
||||
requests
|
||||
aiohttp
|
||||
paramiko
|
||||
tailscale
|
||||
pandas
|
||||
pydub
|
||||
torch
|
||||
selenium
|
||||
webdriver_manager
|
||||
faster_whisper
|
||||
filetype
|
||||
html2text
|
||||
markdown
|
||||
ollama
|
||||
aiofiles
|
||||
bs4
|
||||
imbox
|
||||
newspaper3k
|
||||
python-magic
|
||||
urllib3
|
||||
whisper
|
||||
huggingface_hub
|
||||
numpy
|
||||
tqdm
|
||||
tiktoken
|
||||
numba
|
||||
scipy
|
||||
vectordb
|
||||
IPython
|
||||
torchaudio
|
||||
lxml
|
||||
lxml_html_clean
|
||||
pdfminer.six
|
61
setup.py
Normal file
61
setup.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='sijapi',
|
||||
version='0.1',
|
||||
packages=find_packages(),
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'sijapi = sijapi.__main__:main',
|
||||
],
|
||||
},
|
||||
install_requires=[
|
||||
'fastapi',
|
||||
'python-dotenv',
|
||||
'hypercorn',
|
||||
'setuptools',
|
||||
'PyPDF2',
|
||||
'pdf2image',
|
||||
'pdfminer',
|
||||
'pytesseract',
|
||||
'python-dateutil',
|
||||
'python-docx',
|
||||
'starlette',
|
||||
'httpx',
|
||||
'pydantic',
|
||||
'pytz',
|
||||
'requests',
|
||||
'aiohttp',
|
||||
'paramiko',
|
||||
'tailscale',
|
||||
'pandas',
|
||||
'pydub',
|
||||
'torch',
|
||||
'selenium',
|
||||
'webdriver_manager',
|
||||
'faster_whisper',
|
||||
'filetype',
|
||||
'html2text',
|
||||
'markdown',
|
||||
'ollama',
|
||||
'aiofiles',
|
||||
'bs4',
|
||||
'pdfminer.six',
|
||||
'lxml_html_clean',
|
||||
'imbox',
|
||||
'newspaper3k',
|
||||
'python-magic',
|
||||
'urllib3',
|
||||
'whisper',
|
||||
'huggingface_hub',
|
||||
'numpy',
|
||||
'tqdm',
|
||||
'tiktoken',
|
||||
'numba',
|
||||
'scipy',
|
||||
'vectordb',
|
||||
'IPython',
|
||||
'torchaudio'
|
||||
],
|
||||
)
|
||||
|
92
sij.asc
Normal file
92
sij.asc
Normal file
|
@ -0,0 +1,92 @@
|
|||
-----BEGIN PGP PUBLIC KEY BLOCK-----
|
||||
|
||||
mQINBGY+fL4BEADCpz8FAfa6/7i9mEQCYlwwP2k9DlrUzz+u9BL4BmuoTEcGty9M
|
||||
7EA2ivRxXo371IIMjL/GyAa8I3WHMEhxuRlGldUQaHzo6PicTn+OiLJ/g2vCfStN
|
||||
jIYog3WC25P7Es1n1hDuOu8rUL93twXZ4NevgYx+G44M7Q+/1AbSXf83kpawlHhg
|
||||
HcGmH2vt9UulfTGAvN9s2sH2pn89812lpWLSdPARNw09ePZy4RdiEgJ6t+S+wjaE
|
||||
Ue/H4FcQC1MLrQnkW5soUOduY9HN0iUk/xZqqkRQctl3ds5oInE483vQsL0HKFvs
|
||||
MB8lBdXTbVzxvpFe+fvT8d6hiZ/YgxIUEl1KZLDd3atqj+UREuG+LABZUKC4nSUP
|
||||
EXneXUqi4qVCW9827K9/H+IKahe8OE+OrZAsSfLtsp4AznIxgyQbvpUZzCuRASJN
|
||||
Kt1cjcJBOv5L0HJ8tVykZd23WuKUXiyxTs1MxsDGyjew30IsAg4WNO/iw9vBO/Yu
|
||||
pfjlZTcgbghdIuNmOrnCyzKWtUxxfDtWwEBBshbTKusOaGhauBaHrRVE7lKlTblM
|
||||
x1JIzYBziDmFy25J1XvYb3guilk1yy54poLQaEcE54mQYWHKCNS4eQeL5dJR3Nmu
|
||||
Pt9GXdMyNO3uyog3WYpyYqch+osbBsHFVNUClxMycnyqZzHQeZHPNJBzJwARAQAB
|
||||
tC5TYW5neWUgSW5jZS1Kb2hhbm5zZW4gKEF0dG9ybmV5KSA8c2lqQHNpai5sYXc+
|
||||
iQJXBBMBCABBAhsDBQkHhh8tBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMjqK
|
||||
LEezdiJLNhO3U1smWu2+W0QFAmY+fPUCGQEACgkQU1smWu2+W0SwBQ/+L5S1fIop
|
||||
6iQ/6gQENBNCUVgACWP0/ViJzQGo4iF3UZkV5KV8pgk/TenZSXCLxUj6UpSAe25m
|
||||
vtrGV4NCL2hLn1NPK11Na6IM1ykfh/L67NKeCqmtQYwNLwW0o0fvUpK9fahPxhmv
|
||||
EFo+lVCabQndgzmLxnUhxH4qkGSejsaSFoJQ6fVl/DExCL4w/R5rStnRMKDtkuF1
|
||||
ONfjZpuLrAylx8Ypf/rocQYn5AJcRD5ZL2bGgDZNe85VNBFmD3b2cGSVpm3J6Rg/
|
||||
fPfs1lgtpgXWbBDCF8nRY326Utbr3qoeZUXVQjVZ05Q2SpUYFHiDZJ3EFwQikg5n
|
||||
cIBfcXQZQhTq/OK0eS0vB1li8m1ce9m8iMC+Pxe5toPkxFV5RO1+o5PG1SyOfzfV
|
||||
F1c0O9JQqdJzRHoTuqLtVhlmRVBU2d6TjWYlZ6TwPShSTLu0Tkm4EeFJS4oag75d
|
||||
q7LlIIvrWS4n3CqVpC/PEIUtclytkOkvNQaSWHEVkappS3UjkX1BJmaI8zXYh9jh
|
||||
sV/5FckvwYnky+w6geFOBs34NW0rg9oNw4KNAywYcOPbI/Ev1z57my+MpA5msw+B
|
||||
ww9sFC+tzQCSJl0FU2Dg2YMnyqfUtGr9HfXdAGuuUVh+cYFmEdwwZqBWl37pNIGL
|
||||
SxfF1AdrlHCSpJcLVETe80UraMFAI7tyOwe0L1Nhbmd5ZSBJbmNlLUpvaGFubnNl
|
||||
biA8c2FuZ3llaWpAd2VzdGVybmxhdy5vcmc+iQJUBBMBCAA+FiEEMjqKLEezdiJL
|
||||
NhO3U1smWu2+W0QFAmY+fOgCGwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgEC
|
||||
F4AACgkQU1smWu2+W0RlnBAArwaFta9NTRdubTqctv1EET1D9OXAE/R5vdSk2jRQ
|
||||
1CMYmv6KeMm0Rl7+dNFet/vJOEtITF7TZHnt7WBy7n5m+SIoARsaZYEchjZKsE2g
|
||||
6RvRWqFGYuUYQWTRKsw0b2tT16BaNLKdV/w3ndRQNS6wDJrW1dRnIWxm4z26d3/H
|
||||
Rt3o8+LUVxdSWGLliKZU00S+FNPVSwWe/X7+CoIE7T5XZL+OIEJ6DfpK2pkHKT6D
|
||||
FswF3KOLG36vz5eISk4AT+o9AEoFIpX0hce3DMixEYQSgKN230K8RchC59bO81zE
|
||||
w7Mic4vpn/wKFhicn+0BA1aJzzOd8iEwiA0p5baq4b2xIwCBiO4uv/HXR1SN1Tfk
|
||||
QozjAGzl8LzrmwGTWOtOSk/7ckPhPR2MGNhMdtJ7rPeHxImJLh+/f4uBmYnQUdw4
|
||||
0j3sMpJmrShW5dXJ8YHqVFfqabYD8HkBztdYI0qGJDpQjEbW6V+DvMWQXOZ8c1ul
|
||||
NN2vZyY25RkypMQLiphImJa+q6eGtBEas40MeAkgQKIBPBBpb6W1km+m6UnOADKB
|
||||
0/vOWcZMgijyMPp7WvwXbOwmXI27rHsUTvhFDLPI113a9I5bU8j6VyW2s/sst3Xc
|
||||
OQDzEgR3KvD4dWjczIg6yliIq9eM5hskpsYyfDfWRWrIbR3Tg8XPwnQRB9dPEHIy
|
||||
rKS0KVNhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2FuZ3llQHJpc2V1cC5uZXQ+iQJU
|
||||
BBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fQYCGwMFCQeGHy0FCwkI
|
||||
BwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0SKGA//VRGpS7IwOOlHF7OI
|
||||
+LEMDebLpLB2PswfWrK+sI9YdXXV/CaT0NcCz2HPCoK+coaDkl4cxh10ykVbjO36
|
||||
wZc/rvhpzga2wMLpBLNwpTvSlfMwsCQeRQay498bgdR59gf1hYa/dPYKKrBgNxHa
|
||||
Kc3dMDWU0adpV4zV1s/iFNQQZfmhUah+8TTlB03hahPzn8V7CqQF+jTfSXiWPv/V
|
||||
eD1W6Sc1juvLTVxTThbM5ewiIhMP2t7KM+M4viOEqce79IcE2HTcpCaEI7Lh/Eld
|
||||
9VBZZk/gENuPqyQuLbOIOQhC6LYRZkZC9Vv1FDutfWV5ZBPyaTY/n5pGW3lo+Tfa
|
||||
FLSamQcD6dyiGm/ZyQbPUDt2aWhqRGr7VvvtfyXLazL9T9Y6ASr5UjLakPr5ihUz
|
||||
B8InRch9ACPbu7QSIGFk9PQgHme2Cd/HMRLIALnkAmrafgDE+14Rlp9qI2nYhWdD
|
||||
jkZcLalPXQCDBxUfj1q192Nn3wlKsDkDd2RWT7Mc2RJq2FR36KADPMtz2oJPSib4
|
||||
eRgI40E9Wv+zqHDDTU2K/bLi3nmBHvKnXWXPyiBPVL+CAoAhkYHHJwNuRQfxlukq
|
||||
heS4/CMBRB04foTeu2ltl6/sQdAIyBGKbOC6fMyhJFYbi16nWI6j7iw2XQnqyitu
|
||||
jC8Pz14NfIAQTpKCVcV32Kn2k1+0I1Nhbmd5ZSBJbmNlLUpvaGFubnNlbiA8c2lq
|
||||
QGVudi5lc3E+iQJUBBMBCAA+FiEEMjqKLEezdiJLNhO3U1smWu2+W0QFAmY+fRIC
|
||||
GwMFCQeGHy0FCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQU1smWu2+W0Rbxw/+
|
||||
OMYnlyXvo146+3M6JGdvW36CWmc9ZcmaU+xJM3FnG91WNo5J8MnHl0Ks9BwjNWtm
|
||||
VJgFEdi2EVpSLJnYdQyJILCNt8RAclYvbFHYUOIDEEC2yr5ZKt/odwYAXPxaqQ4O
|
||||
Sj7R2GbLA52O8zGWfARBAnAQycrlBRjItdpzGeWgRST8O/ot/IkU7xsAKW72E2VB
|
||||
9jlCahp5c01lEideVqzVhk3z6GzVz1NUKsglgEOmTIjld4mMs+4GX/93q0u1erKO
|
||||
I7Q6RL6lfdc2opGi5jFMXGWhLLgX2SSsBFJRuSQGnTpbx3XWFS5uA+cku7Fh0fC0
|
||||
MKr2vsY18Z6OqU0MdQm6ovIVcvhzIdGfnBU9Ct98DMiUhDCmx3o9XneWj1n7kWKM
|
||||
gT8s8AvE27tidtkZApwIKHdUy6qfyqwRjxE+KdL6Eh48x3TVYep+wfSfPJ1eq9Ne
|
||||
7WWXKUx6FGNH01hpQdTLbCYqmwMa03och1wwyi+0wc8rHe6k6y2tURtP3mINkDeV
|
||||
u1QmVaGRDA2r7oDm9UsFeupGsbFBnTkQIfJgnrLRJFfN2FDJPZDcd/VS71AOSL5C
|
||||
jY+Dr/WHYPWeN8MHXfG4r/P41wsrnAJEAzSvLRQ9GYCLPe825W+uDJx9eMePodFa
|
||||
BeIBcM633WXpbIXHnRQhPDfTzejCejO6GoPE7PbtBBi5Ag0EZj58vgEQAPUqNOgQ
|
||||
kAPd/S+nkGAfvnzC5UD6lVWaQTVL/xU2K1J8l11K5Ck4nq7oMKhzEitu0lA+jG7q
|
||||
JVwXMj9+rnoGlbIYmmxCZYpSit930Mss3HjYU8IAF4nybGwc5+wO77cldk3WJTI0
|
||||
EkFgiM4Jk6Gk/tRf1LgMIfJIUgm8MooPLqg2z5Pj+bbwxw42A20enEwtF3ivEETJ
|
||||
wuJwsp5uCOAfzOGqqBvp19PMTPynUBuwEXCkJfb0CCz+5yhjoi6ZjCVXxjuoe2wN
|
||||
jFwoYd8odfSuvC6Fh9qqXnjF7HZLxEyN7K1L/y/sWarsN01zbUUI3kZlnTuamDu4
|
||||
LdZtl2q3QqDyxmzHIWLTa1qL0s3WooB7JJqBYaNmQjLHadoktZ4vfhl7kjXYsg+i
|
||||
84oipL83u2cRHplpqnRk9qVwNdW01EObjNafWY6t3942sM4e/yOdQiaXlxivPuHV
|
||||
VYwme6K53lmGcV3ipMWRpNkme+oKV/TdYTTdlDaLgC8ga5AW6poNoSp5UpNeOs0E
|
||||
mxIZivpRQSCr3g+jScy0RdX/+tI1gWe+2ZIHFwR+1WsXvLXHyd1wVyH4vDxSf1bE
|
||||
VRVsXLZDT/xMGDzNzAC76kzoIykrcndFiTbNzB/LjZJuls6fRdN07bTcymWEKYiP
|
||||
Ia6iGdag6+ueoX4eDzbjCvldKtkfr/EhB7MfABEBAAGJAjwEGAEIACYWIQQyOoos
|
||||
R7N2Iks2E7dTWyZa7b5bRAUCZj58vgIbDAUJB4YfLQAKCRBTWyZa7b5bRLZdEACk
|
||||
AaXNVeywC9+X6bdwkKV5Jl6Hv238cGd58TuVbjd+tii1JazbKEqCAr5tTlGtrUZg
|
||||
fyjM0z5sMKDSZ15paX4xDbDs+xdfMxLVdjmFlZgwTrrTSIx3ODxPo/sSeyrzGZrQ
|
||||
hlZjOHP1Bvln0OTQwK0yE3Eaip0FhIpJA5FX3yrZfvza3St5leNOXsZgEri68cgf
|
||||
mVhS9tBD2I9TpCVwgq5vRnloAMgtQBYr8N9glXBfs2WsPhU96HSSH88osJW+lCkG
|
||||
vTtzQBEjnnSQ/ssHBYz4DfpsJe1fbM+9WVow6q2nkUhqg5TfdAt4H0ra2uPXnNz8
|
||||
lvQObVHlw7T0w5UTzgBdlCyYplyTG2gcZi+UWzit6YH9DH82j1otcq3+3NlrKwo0
|
||||
TSJKZNagiqgJNZ1mhJQTt3JDacFFkBBxLf6trruuyInRU1leo87hzHCxIlMbQPqh
|
||||
ogtV+W9FHElVJwoTQi8YF+0AacZPzK8wJmlPLxBeqs+ULJ8H5wZxlEBB1Jj91/W9
|
||||
6R8m2IUZCsXNNpYU+f7uB8x0RUS3pU8S7GcwdJmOa16Xc4VdfWugm4TTEtajeSYC
|
||||
ek5j/2s/QkAum5slT2Y6Aam0Jj/IhsGHKVEnR6DS01mZqVeeu0giPFUO4ZX5C0n9
|
||||
mAmw/ZUGIOj6ls3KMBHv4pqQI7nd00tW8eIMgKGgKQ==
|
||||
=PhPl
|
||||
-----END PGP PUBLIC KEY BLOCK-----
|
251
sijapi/__init__.py
Normal file
251
sijapi/__init__.py
Normal file
|
@ -0,0 +1,251 @@
|
|||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
import ipaddress
|
||||
import multiprocessing
|
||||
from dotenv import load_dotenv
|
||||
from dateutil import tz
|
||||
from pathlib import Path
|
||||
from pydantic import BaseModel
|
||||
import traceback
|
||||
import logging
|
||||
from .logs import Logger
|
||||
|
||||
|
||||
# from sijapi.config.config import load_config
|
||||
# cfg = load_config()
|
||||
|
||||
### Initial initialization
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
CONFIG_DIR = BASE_DIR / "config"
|
||||
ENV_PATH = CONFIG_DIR / ".env"
|
||||
LOGS_DIR = BASE_DIR / "logs"
|
||||
|
||||
# Create logger instance
|
||||
package_logger = Logger(__name__, LOGS_DIR)
|
||||
LOGGER = package_logger.get_logger()
|
||||
|
||||
def DEBUG(log_message): LOGGER.debug(log_message)
|
||||
def INFO(log_message): LOGGER.info(log_message)
|
||||
def WARN(log_message): LOGGER.warning(log_message)
|
||||
|
||||
def ERR(log_message):
|
||||
LOGGER.error(log_message)
|
||||
LOGGER.error(traceback.format_exc())
|
||||
|
||||
def CRITICAL(log_message):
|
||||
LOGGER.critical(log_message)
|
||||
LOGGER.critical(traceback.format_exc())
|
||||
|
||||
os.makedirs(LOGS_DIR, exist_ok=True)
|
||||
load_dotenv(ENV_PATH)
|
||||
|
||||
### API essentials
|
||||
ROUTERS = os.getenv('ROUTERS', '').split(',')
|
||||
PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',')
|
||||
GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY")
|
||||
# HOST_NET and HOST_PORT comprise HOST, which is what the server will bind to
|
||||
HOST_NET = os.getenv("HOST_NET", "127.0.0.1")
|
||||
HOST_PORT = int(os.getenv("HOST_PORT", 4444))
|
||||
HOST = f"{HOST_NET}:{HOST_PORT}"
|
||||
BASE_URL = os.getenv("BASE_URL", f"http://{HOST}")
|
||||
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
|
||||
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
|
||||
TRUSTED_SUBNETS = [ipaddress.ip_network(subnet.strip()) for subnet in os.getenv('TRUSTED_SUBNETS', '127.0.0.1/32').split(',')]
|
||||
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
|
||||
|
||||
### Directories & general paths
|
||||
HOME_DIR = Path.home()
|
||||
ROUTER_DIR = BASE_DIR / "routers"
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
ALERTS_DIR = DATA_DIR / "alerts"
|
||||
os.makedirs(ALERTS_DIR, exist_ok=True)
|
||||
REQUESTS_DIR = LOGS_DIR / "requests"
|
||||
os.makedirs(REQUESTS_DIR, exist_ok=True)
|
||||
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
|
||||
|
||||
|
||||
### Databases
|
||||
DB = os.getenv("DB", 'sijdb')
|
||||
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
|
||||
DB_PORT = os.getenv("DB_PORT", 5432)
|
||||
DB_USER = os.getenv("DB_USER", 'sij')
|
||||
DB_PASS = os.getenv("DB_PASS")
|
||||
DB_SSH = os.getenv("DB_SSH", "100.64.64.15")
|
||||
DB_SSH_USER = os.getenv("DB_SSH_USER")
|
||||
DB_SSH_PASS = os.getenv("DB_SSH_ENV")
|
||||
DB_URL = f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB}'
|
||||
|
||||
|
||||
### LOCATE AND WEATHER LOCALIZATIONS
|
||||
USER_FULLNAME = os.getenv('USER_FULLNAME')
|
||||
USER_BIO = os.getenv('USER_BIO')
|
||||
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
|
||||
HOME_ZIP = os.getenv("HOME_ZIP") # unimplemented
|
||||
LOCATION_OVERRIDES = DATA_DIR / "loc_overrides.json"
|
||||
LOCATIONS_CSV = DATA_DIR / "US.csv"
|
||||
# DB = DATA_DIR / "weatherlocate.db" # deprecated
|
||||
VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline")
|
||||
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
|
||||
|
||||
|
||||
### Obsidian & notes
|
||||
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or HOME_DIR / "Nextcloud" / "notes")
|
||||
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
|
||||
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
|
||||
OBSIDIAN_BANNER_DIR = f"{OBSIDIAN_RESOURCES_DIR}/banners"
|
||||
os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_BANNER_DIR, exist_ok=True)
|
||||
OBSIDIAN_BANNER_SCENE = os.getenv("OBSIDIAN_BANNER_SCENE", "wallpaper")
|
||||
OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian")
|
||||
DOC_DIR = DATA_DIR / "docs"
|
||||
os.makedirs(DOC_DIR, exist_ok=True)
|
||||
|
||||
### DATETIME SCHEMA FOR DAILY NOTE FOLDER HIERARCHY FORMATTING ###
|
||||
YEAR_FMT = os.getenv("YEAR_FMT")
|
||||
MONTH_FMT = os.getenv("MONTH_FMT")
|
||||
DAY_FMT = os.getenv("DAY_FMT")
|
||||
DAY_SHORT_FMT = os.getenv("DAY_SHORT_FMT")
|
||||
|
||||
### Large language model
|
||||
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
|
||||
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
|
||||
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
|
||||
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
|
||||
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "dolphin-mistral")
|
||||
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
|
||||
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "Luna")
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
### Stable diffusion
|
||||
SD_IMAGE_DIR = DATA_DIR / "sd" / "images"
|
||||
os.makedirs(SD_IMAGE_DIR, exist_ok=True)
|
||||
SD_WORKFLOWS_DIR = DATA_DIR / "sd" / "workflows"
|
||||
os.makedirs(SD_WORKFLOWS_DIR, exist_ok=True)
|
||||
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
|
||||
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
|
||||
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
|
||||
COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py')
|
||||
SD_CONFIG_PATH = CONFIG_DIR / 'sd.json'
|
||||
with open(SD_CONFIG_PATH, 'r') as SD_CONFIG_file:
|
||||
SD_CONFIG = json.load(SD_CONFIG_file)
|
||||
|
||||
### Summarization
|
||||
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
|
||||
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 100)) # measured in tokens
|
||||
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
|
||||
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
|
||||
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
|
||||
SUMMARY_INSTRUCT = os.getenv("SUMMARY_INSTRUCT", "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
||||
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "llama3")
|
||||
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 4096))
|
||||
|
||||
### ASR
|
||||
ASR_DIR = DATA_DIR / "asr"
|
||||
os.makedirs(ASR_DIR, exist_ok=True)
|
||||
WHISPER_CPP_DIR = HOME_DIR / str(os.getenv("WHISPER_CPP_DIR"))
|
||||
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'NULL,VOID').split(',')
|
||||
|
||||
### TTS
|
||||
PREFERRED_TTS = os.getenv("PREFERRED_TTS", "None")
|
||||
TTS_DIR = DATA_DIR / "tts"
|
||||
os.makedirs(TTS_DIR, exist_ok=True)
|
||||
VOICE_DIR = TTS_DIR / 'voices'
|
||||
os.makedirs(VOICE_DIR, exist_ok=True)
|
||||
PODCAST_DIR = TTS_DIR / "sideloads"
|
||||
os.makedirs(PODCAST_DIR, exist_ok=True)
|
||||
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
|
||||
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
|
||||
TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
|
||||
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
|
||||
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
||||
|
||||
### Calendar & email account
|
||||
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
|
||||
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
|
||||
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
|
||||
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
|
||||
class IMAP_DETAILS(BaseModel):
|
||||
email: str
|
||||
password: str
|
||||
host: str
|
||||
imap_port: int
|
||||
smtp_port: int
|
||||
imap_encryption: str = None
|
||||
smtp_encryption: str = None
|
||||
|
||||
IMAP = IMAP_DETAILS(
|
||||
email = os.getenv('IMAP_EMAIL'),
|
||||
password = os.getenv('IMAP_PASSWORD'),
|
||||
host = os.getenv('IMAP_HOST', '127.0.0.1'),
|
||||
imap_port = int(os.getenv('IMAP_PORT', 1143)),
|
||||
smtp_port = int(os.getenv('SMTP_PORT', 469)),
|
||||
imap_encryption = os.getenv('IMAP_ENCRYPTION', None),
|
||||
smtp_encryption = os.getenv('SMTP_ENCRYPTION', None)
|
||||
)
|
||||
AUTORESPONSE_WHITELIST = os.getenv('AUTORESPONSE_WHITELIST', '').split(',')
|
||||
AUTORESPONSE_BLACKLIST = os.getenv('AUTORESPONSE_BLACKLIST', '').split(',')
|
||||
AUTORESPONSE_BLACKLIST.extend(["no-reply@", "noreply@", "@uscourts.gov", "@doi.gov"])
|
||||
AUTORESPONSE_CONTEXT = os.getenv('AUTORESPONSE_CONTEXT', None)
|
||||
AUTORESPOND = AUTORESPONSE_CONTEXT != None
|
||||
|
||||
### Courtlistener & other webhooks
|
||||
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
|
||||
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
|
||||
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
|
||||
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
|
||||
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
|
||||
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
|
||||
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
|
||||
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
|
||||
|
||||
### Keys & passwords
|
||||
PUBLIC_KEY_FILE = os.getenv("PUBLIC_KEY_FILE", 'you_public_key.asc')
|
||||
PUBLIC_KEY = (BASE_DIR.parent / PUBLIC_KEY_FILE).read_text()
|
||||
MAC_ID = os.getenv("MAC_ID")
|
||||
MAC_UN = os.getenv("MAC_UN")
|
||||
MAC_PW = os.getenv("MAC_PW")
|
||||
TIMING_API_KEY = os.getenv("TIMING_API_KEY")
|
||||
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
|
||||
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
|
||||
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
|
||||
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
|
||||
|
||||
### Tailscale
|
||||
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
|
||||
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
|
||||
TS_ID = os.getenv("TS_ID", "NULL")
|
||||
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
|
||||
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
|
||||
|
||||
### Cloudflare
|
||||
CF_API_BASE_URL = os.getenv("CF_API_BASE_URL")
|
||||
CF_TOKEN = os.getenv("CF_TOKEN")
|
||||
CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
|
||||
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
|
||||
|
||||
### Caddy - not fully implemented
|
||||
BASE_URL = os.getenv("BASE_URL")
|
||||
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
|
||||
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
|
||||
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
|
||||
|
||||
|
||||
### Microsoft Graph
|
||||
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
|
||||
MS365_SECRET = os.getenv('MS365_SECRET')
|
||||
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
|
||||
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
|
||||
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
|
||||
MS365_KEY = MS365_KEY_PATH.read_text()
|
||||
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
|
||||
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
|
||||
|
||||
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
|
||||
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
|
||||
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
|
||||
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
|
||||
|
||||
### Maintenance
|
||||
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
|
||||
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours
|
146
sijapi/__main__.py
Executable file
146
sijapi/__main__.py
Executable file
|
@ -0,0 +1,146 @@
|
|||
#!/Users/sij/miniforge3/envs/api/bin/python
|
||||
from fastapi import FastAPI, Request, HTTPException, Response
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import ClientDisconnect
|
||||
from hypercorn.asyncio import serve
|
||||
from hypercorn.config import Config
|
||||
import sys
|
||||
import asyncio
|
||||
import httpx
|
||||
import argparse
|
||||
import json
|
||||
import ipaddress
|
||||
import importlib
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import argparse
|
||||
from . import LOGGER, LOGS_DIR
|
||||
from .logs import Logger
|
||||
|
||||
parser = argparse.ArgumentParser(description='Personal API.')
|
||||
parser.add_argument('--debug', action='store_true', help='Set log level to INFO')
|
||||
parser.add_argument('--test', type=str, help='Load only the specified module.')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Using the package logger
|
||||
main_logger = Logger("main", LOGS_DIR)
|
||||
main_logger.setup_from_args(args)
|
||||
logger = LOGGER
|
||||
|
||||
# Use the logger
|
||||
logger.debug("Debug Log")
|
||||
logger.info("Info Log")
|
||||
|
||||
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
|
||||
from sijapi import HOST, ENV_PATH, GLOBAL_API_KEY, REQUESTS_DIR, ROUTER_DIR, REQUESTS_LOG_PATH, PUBLIC_SERVICES, TRUSTED_SUBNETS, ROUTERS
|
||||
|
||||
|
||||
# Initialize a FastAPI application
|
||||
api = FastAPI()
|
||||
|
||||
|
||||
# CORSMiddleware
|
||||
api.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=['*'],
|
||||
allow_credentials=True,
|
||||
allow_methods=['*'],
|
||||
allow_headers=['*'],
|
||||
)
|
||||
|
||||
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
client_ip = ipaddress.ip_address(request.client.host)
|
||||
if request.method == "OPTIONS":
|
||||
# Allow CORS preflight requests
|
||||
return JSONResponse(status_code=200)
|
||||
if request.url.path not in PUBLIC_SERVICES:
|
||||
if not any(client_ip in subnet for subnet in TRUSTED_SUBNETS):
|
||||
api_key_header = request.headers.get("Authorization")
|
||||
api_key_query = request.query_params.get("api_key")
|
||||
if api_key_header:
|
||||
api_key_header = api_key_header.lower().split("bearer ")[-1]
|
||||
if api_key_header != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
|
||||
ERR(f"Invalid API key provided by a requester.")
|
||||
return JSONResponse(
|
||||
status_code=401,
|
||||
content={"detail": "Invalid or missing API key"}
|
||||
)
|
||||
response = await call_next(request)
|
||||
# DEBUG(f"Request from {client_ip} is complete")
|
||||
return response
|
||||
|
||||
api.add_middleware(SimpleAPIKeyMiddleware)
|
||||
|
||||
canceled_middleware = """
|
||||
@api.middleware("http")
|
||||
async def log_requests(request: Request, call_next):
|
||||
DEBUG(f"Incoming request: {request.method} {request.url}")
|
||||
DEBUG(f"Request headers: {request.headers}")
|
||||
DEBUG(f"Request body: {await request.body()}")
|
||||
response = await call_next(request)
|
||||
return response
|
||||
|
||||
async def log_outgoing_request(request):
|
||||
INFO(f"Outgoing request: {request.method} {request.url}")
|
||||
DEBUG(f"Request headers: {request.headers}")
|
||||
DEBUG(f"Request body: {request.content}")
|
||||
"""
|
||||
|
||||
@api.exception_handler(HTTPException)
|
||||
async def http_exception_handler(request: Request, exc: HTTPException):
|
||||
ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}")
|
||||
ERR(f"Request: {request.method} {request.url}")
|
||||
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
|
||||
|
||||
@api.middleware("http")
|
||||
async def handle_exception_middleware(request: Request, call_next):
|
||||
try:
|
||||
response = await call_next(request)
|
||||
except RuntimeError as exc:
|
||||
if str(exc) == "Response content longer than Content-Length":
|
||||
# Update the Content-Length header to match the actual response content length
|
||||
response.headers["Content-Length"] = str(len(response.body))
|
||||
else:
|
||||
raise
|
||||
return response
|
||||
|
||||
|
||||
|
||||
def load_router(router_name):
|
||||
router_file = ROUTER_DIR / f'{router_name}.py'
|
||||
DEBUG(f"Attempting to load {router_name.capitalize()}...")
|
||||
if router_file.exists():
|
||||
module_path = f'sijapi.routers.{router_name}'
|
||||
try:
|
||||
module = importlib.import_module(module_path)
|
||||
router = getattr(module, router_name)
|
||||
api.include_router(router)
|
||||
INFO(f"{router_name.capitalize()} router loaded.")
|
||||
except (ImportError, AttributeError) as e:
|
||||
CRITICAL(f"Failed to load router {router_name}: {e}")
|
||||
else:
|
||||
ERR(f"Router file for {router_name} does not exist.")
|
||||
|
||||
def main(argv):
|
||||
if args.test:
|
||||
load_router(args.test)
|
||||
else:
|
||||
CRITICAL(f"sijapi launched")
|
||||
CRITICAL(f"{args._get_args}")
|
||||
for router_name in ROUTERS:
|
||||
load_router(router_name)
|
||||
|
||||
config = Config()
|
||||
config.keep_alive_timeout = 1200
|
||||
config.bind = [HOST]
|
||||
asyncio.run(serve(api, config))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
496
sijapi/config/.env-example
Normal file
496
sijapi/config/.env-example
Normal file
|
@ -0,0 +1,496 @@
|
|||
#──────────────────────────────────────────────────────────────────────────────────
|
||||
# C O N F I G U R A T I O N F I L E
|
||||
#──────────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# Hi friend! You've found my hidden .config.YAML-example file. Do you like Zalgo
|
||||
# text and old-school ASCII art? I bet you do. So listen, this'll be your method
|
||||
# for configuring sijapi, and nothing works until you at least:
|
||||
#
|
||||
# (1) fill in the ESSENTIALS category, and
|
||||
#
|
||||
# (2) rename this file `.config.yaml`
|
||||
#
|
||||
# ... and even then, certain features will not work until you set other
|
||||
# relevant variables below.
|
||||
#
|
||||
# So get yourself a beverage, put on some sick beats, and settle in for a vibe-y
|
||||
# configuration sesh. Remember to read my detailed notes if you ever feel lost,
|
||||
# and most important, remember:
|
||||
#
|
||||
# † you are NOT alone,
|
||||
# † I love you SO much,
|
||||
# † and you are S̸̢̟̑̒̊ͅō̸͔͕͎̟͜ worthy.
|
||||
#
|
||||
# y o u r b f & b f 4 e ,
|
||||
# .x+=:. . .
|
||||
# z` ^% @88> .. †††>
|
||||
# . <k %8P 888> .d`` %†P
|
||||
# .@8Ned8" . "8P u @8Ne. .u .
|
||||
# .@^%8888" .@88u . us888u. %8888:u@88N .@88u
|
||||
# x88: `)8b. ''888E` u888u. .@88 "8888" `888I 888. ''888E`
|
||||
# ~ 8888N=*8888 888E `'888E 9888 9888 888I 888I 888E
|
||||
# %8" R88 888E 888E 9888 9888 888I 888I 888E
|
||||
# @8Wou 9% 888E 888E 9888 9888 uW888L 888' 888E
|
||||
# .888888P` 888& 888E 9888 9888 '*88888Nu88P 888&
|
||||
# ` ^"F R888" 888E "888*""888" ~ '88888F` R888"
|
||||
# "" 888E ^Y" ^Y' 888 ^ ""
|
||||
# 888E *8E
|
||||
# 888P '8>
|
||||
# .J88" " "
|
||||
#
|
||||
#
|
||||
# B U T I H E A R Y O U :
|
||||
# L E T ' S T A K E I T S L O W A N D
|
||||
# ───────────── S̢͉̺ T̪͔͓ A͇̞ R̘͕͙ T̢̡͉ W͚̻ I͉͇͜ T̟͖̺ H̡͚͙ T̺̞̠ H̢̢̙ E̢̪͓ ──────────────
|
||||
#
|
||||
# ███████╗███████╗███████╗███████╗███╗ ██╗████████╗██╗ █████╗ ██╗ ███████╗
|
||||
# ██╔════╝██╔════╝██╔════╝██╔════╝████╗ ██║╚══██╔══╝██║██╔══██╗██║ ██╔════╝
|
||||
# █████╗ ███████╗███████╗█████╗ ██╔██╗ ██║ ██║ ██║███████║██║ ███████╗
|
||||
# ██╔══╝ ╚════██║╚════██║██╔══╝ ██║╚██╗██║ ██║ ██║██╔══██║██║ ╚════██║
|
||||
# ███████╗███████║███████║███████╗██║ ╚████║ ██║ ██║██║ ██║███████╗███████║
|
||||
# ╚══════╝╚══════╝╚══════╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝
|
||||
# ─────────────────────────────────────────────────────────────────
|
||||
#
|
||||
#─── first, bind an ip address and port : ──────────────────────────────────────────
|
||||
HOST_NET=0.0.0.0
|
||||
HOST_PORT=4444
|
||||
BASE_URL=http://localhost:4444 # <--- replace with base URL of reverse proxy, etc
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
|
||||
# BASE_URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the sd router.
|
||||
# BASE_URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
|
||||
#
|
||||
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
|
||||
#
|
||||
# Here are a few options to consider to more securely enable access from
|
||||
# other devices:
|
||||
#
|
||||
# (1) if all access can occur over Tailscale, either:
|
||||
# (a) leave HOST_NET set to 127.0.0.1, run `tailscale cert $(tailscale
|
||||
# whois $(tailscale ip | head -n 1) | awk '/Name:/ {print $2}')
|
||||
# if you haven't already issued yourself a TLS certificate on
|
||||
# Tailscale, and then run `tailscale serve --bg --https=4443
|
||||
# 4444` to expose sijapi to your other tailscale-enabled devices
|
||||
# at `https://{device.magicdns-domain.net:4443`}; or
|
||||
# (b) set HOST_NET to your server's Tailscale IP (this should work
|
||||
# but for me doesn't reliably)
|
||||
#
|
||||
# (2) if WAN access truly is required, leave HOST_NET set to 127.0.0.1 and
|
||||
# configure either:
|
||||
# (a) a Cloudflare tunnel, or
|
||||
# (b) a reverse proxy with HTTPS (Caddy is excellent for this).
|
||||
#
|
||||
# And please be sure to set a strong API key either way but especially for (2).
|
||||
# ──────────
|
||||
#
|
||||
#──── configure API key authorization and select exemptions──────────────────begin
|
||||
GLOBAL_API_KEY=¿SECRET? # <--- specify a key to unlock the API
|
||||
PUBLIC_SERVICES=/id,/ip,/health,/img/,/cl/dockets,/cl/search,/cd/alert
|
||||
TRUSTED_SUBNETS=127.0.0.1/32,10.13.37.0/24,100.64.64.0/24
|
||||
#─── notes: ───────────────────────────────────────────────────────────────────end
|
||||
#
|
||||
# GLOBAL_API_KEY determines the API key that will be required to access all endpoints, except access to PUBLIC_SERVICES or from TRUSTED_SUBNETS. Authentication is made via an `Authorization: Bearer {GLOBAL_API_KEY}` header.
|
||||
# TRUSTED_SUBNETS might commonly include 127.0.0.1/32 (localhost), 100.x.x.0/24 (Tailscale tailnet), and/or 192.168.x.0/24 or 10.x.x.0/24 (local network).
|
||||
# When configuring a reverse proxy or Cloudflare tunnel, please verify traffic through it does not appear to sijapi (i.e. in ./logs) as though it were coming from any of the subnets specified here. For sij, using Caddy, it does not, but your setup may differ.
|
||||
# ──────────
|
||||
#
|
||||
#─── router selection: ────────────────────────────────────────────────────────────
|
||||
ROUTERS=asr,calendar,cf,email,health,hooks,llm,locate,note,rag,sd,serve,summarize,time,tts,weather
|
||||
UNLOADED=ig
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# ROUTERS determines which routers are loaded.†
|
||||
#
|
||||
# UNLOADED is not used directly -- it's just there to help keep track which routers are disabled.
|
||||
#
|
||||
# † ┓ ┏ orth bearing in mind: some routers inherently rely on other routers,
|
||||
# ┃┃┃ 3rd party APIs, or other apps being installed locally. If a router is
|
||||
# ┗┻┛ set to load (i.e. is included in ROUTERS) and it depends on another router,
|
||||
# that other router will also load too irrespective of whether it's listed.
|
||||
#
|
||||
# B U T L E T ' S G E T D O W N T O
|
||||
# B R A S S T A C K S , S H A L L W E ?
|
||||
#
|
||||
# asr: requires faster_whisper — $ pip install faster_whisper — and
|
||||
# downloading the model file specified in ASR_DEFAULT_MODEL.
|
||||
#
|
||||
# calendar: requires (1) a Microsoft 365 account with a properly configured
|
||||
# Azure Active Directory app, and/or (2) Calendars on macOS.
|
||||
#
|
||||
# cf: interfaces with the Cloudflare API and Caddy to register new
|
||||
# [sub-]domains on Cloudflare and deploy them with Caddy as
|
||||
# reverse proxy.
|
||||
#
|
||||
# llm: requires ollama — $ pip install ollama — and downloading the
|
||||
# models set in LLM_DEFAULT_MODEL and LLM_VISION_MODEL.
|
||||
#
|
||||
# email: email auto-responders and summarizers to be found here. Designed
|
||||
# for use with IMAP.
|
||||
#
|
||||
# hooks: designed for two specific use cases: monitoring court dockets
|
||||
# through CourtListener.org, and monitoring arbitrary web pages for
|
||||
# changes in tandem with a self-hosted changedetection.io instance.
|
||||
# Both require accounts; other functionality would require
|
||||
# additional / modified code.
|
||||
#
|
||||
# ig: requires an Instagram account, with credentials and other settings
|
||||
# configured separately in the ig_config.json file; relies heavily
|
||||
# on the llm and sd routers which have their own dependencies.
|
||||
#
|
||||
# locate: some endpoints work as is, but the core location tracking
|
||||
# functionality requires Postgresql + PostGIS extension and are
|
||||
# designed specifically to pair with a mobile device where
|
||||
# Pythonista is installed and configured to run the
|
||||
# `gps_tracker.py` and `gps_upload.py` scripts periodically or per
|
||||
# repeating conditionwy (e.g. via automation under Apple Shortcuts).
|
||||
#
|
||||
# note: designed for use with Obsidian plus the Daily Notes and Tasks
|
||||
# core extensions; and the Admonitions, Banners, Icons (with the
|
||||
# Lucide pack), and Make.md community extensions. Moreover `notes`
|
||||
# relies heavily on the calendar, llm, locate, sd, summarize, time,
|
||||
# tts, and weather routers and accordingly on the external
|
||||
# dependencies of each.
|
||||
#
|
||||
# sd: requires ComfyUI plus any modules and StableDiffusion models
|
||||
# set in sd_config and individual workflow .json files.
|
||||
#
|
||||
# summarize: relies on the llm router and thus requires ollama.
|
||||
#
|
||||
# time: requires the subscription-based macOS app 'Timing' (one of many
|
||||
# apps that together make SetApp an incredible value for macOS users!)
|
||||
#
|
||||
# tts: designed for use with coqui — $ pip install coqui — and/or the
|
||||
# ElevenLabs API.
|
||||
#
|
||||
# weather: requires a VisualCrossing API key and is designed for (but doesn't
|
||||
# itself strictly require) Postgresql with the PostGIS extension;
|
||||
# (... but it presently relies on the locate router, which does).
|
||||
#
|
||||
#
|
||||
# ... Whew! that was a lot, right? I'm so glad we're in this together...
|
||||
# ──────────
|
||||
#
|
||||
#───────── W H A T A R E Y O U R D I G I T S , H O N E Y B U N ? ────────
|
||||
# LOCALIZATION
|
||||
#─── what are your digits, honey-bun?: ──────────────────────────────────────────────
|
||||
TZ=America/Los_Angeles
|
||||
HOME_ZIP=97401
|
||||
#─── notes: ─────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# ──────────
|
||||
#
|
||||
#─────────────────────── Y ₒ ᵤ ' ᵣ ₑ G ₒ ₙ ₙ ₐ ₗ ₒ ᵥ ₑ ────────────────────────
|
||||
#
|
||||
# ░ ░░ ░░ ░ ░░░░░░░░ ░░░ ░░░ ░░ ░░░░░░░ ░
|
||||
# ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒
|
||||
# ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓ ▓▓▓▓▓▓▓▓ ▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓ ▓▓▓▓▓▓▓ ▓▓▓
|
||||
# ████ ████ ████ ████ █████████████ █ ████ █ █ ███████ ███████
|
||||
# ████ ████ ████ █ █ ██ ███ ██ ████ █ █ █
|
||||
#
|
||||
# A N D I ' M N O T. E V E N. J E A L O U S.
|
||||
# Y O U D E S E R V E I T A L L , B A B Y C A K E S.
|
||||
#
|
||||
#─── use tailscale for secure remote access: ───────────────────────────────────────
|
||||
TS_IP=100.13.37.5 # <--- enter your own TS IP address
|
||||
TS_SUBNET=100.13.37.0/24 # <--- enter your own TS subnet (IPv4/CIDR)
|
||||
TS_ID=¿SECRET? # <--- enter your own TS device name
|
||||
TS_TAILNET=screaming_sailfin # <--- enter your own TS tailnet / MagicDNS name
|
||||
TAILSCALE_API_KEY=¿SECRET? # <--- enter your own TS API key
|
||||
#─── notes: ────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# TS_IP should match the Tailscale IP of the device. But this is deprecated, and if the functionality becomes relevant again, it should be come back in the form of a dynamic check (`tailscale status` in a shell subprocess) in __init__.py or even the /id endpoint.
|
||||
# TS_SUBNET should match the IP/CIDR-format tailnet
|
||||
# TS_ID currently has two roles: it's used to assemble the complete MagicDNS of the server, and it determines what the /id endpoint on the health router returns. This is relevant where multiple servers run the script behind a load balancer (e.g. Caddy), as a means to check which server responds. Bear in mind that /id is NOT API key-protected by default here.
|
||||
# TS_TAILNET should match the tailnet's MagicDNS domain (omitting the `.net`, for reasons)
|
||||
# ──────────
|
||||
#
|
||||
#──────────── ᵁ & ᴹ ᴱ , W E C A N G E T T H R O U G H ────────────────────
|
||||
#
|
||||
# ██▓███ ▒█████ ██████ ▄▄▄█████▓ ▄████ ██▀███ ▓█████ ██████
|
||||
# ▓██░ ██▒██▒ ██▒▒██ ▒ ▓ ██▒ ▓▒ ██▒ ▀█▒▓██ ▒ ██▒▓█ ▀ ▒██ ▒
|
||||
# ▓██░ ██▓▒██░ ██▒░ ▓██▄ ▒ ▓██░ ▒░▒██░▄▄▄░▓██ ░▄█ ▒▒███ ░ ▓██▄
|
||||
# ▒██▄█▓▒ ▒██ ██░ ▒ ██▒░ ▓██▓ ░ ░▓█ ██▓▒██▀▀█▄ ▒▓█ ▄ ▒ ██▒
|
||||
# ▒██▒ ░ ░ ████▓▒░▒██████▒▒ ▒██▒ ░ ░▒▓███▀▒░██▓ ▒██▒░▒████▒▒██████▒▒
|
||||
# ▒██▒ ░ ░ ▒░▒░▒░ ▒ ▒▓▒ ▒ ░ ▒ ░░ ░▒ ▒ ░ ▒▓ ░▒▓░░░ ▒░ ░▒ ▒▓▒ ▒ ░
|
||||
# ▒▓▒░ ░ ▒ ▒░ ░ ░▒ ░ ░ ░ ░ ░ ░▒ ░ ▒░ ░ ░ ░░ ░▒ ░ ░
|
||||
# ░▒ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░░ ░ ░ ░ ░ ░
|
||||
# ░░ ░ ░T̷ O̷ G̷ E̷ T̷ H̷ ░ R̷. ░ ░ ░ ░ ░
|
||||
# J U S T ░
|
||||
#─── frag, or weat,and locate modules:── H O L D M Y H A N D.
|
||||
DB=db
|
||||
#
|
||||
DB_HOST=127.0.0.1
|
||||
DB_PORT=5432
|
||||
# R E A L T I G H T.
|
||||
DB_USER=postgres
|
||||
DB_PASS=¿SECRET? # <--- enter your own Postgres password'
|
||||
# Y E A H . . .
|
||||
DB_SSH=100.64.64.15
|
||||
# . . . 𝙹 𝚄 𝚂 𝚃 𝙻 𝙸 𝙺 𝙴 𝚃 𝙷 𝙰 𝚃.
|
||||
DB_SSH_USER=sij
|
||||
DB_SSH_PASS=¿SECRET? # <--- enter SSH password for pg server (if not localhost)
|
||||
#─── notes: ────────────────────────────────────────────────── S E E ? 𝕰 𝖅 - 𝕻 𝖅
|
||||
#
|
||||
# DB, DB_HOST, DB_PORT, DB_USER, and DB_PASS should specify those respective
|
||||
# credentials for your Postgres database. DB_SSH and associated _USER and _PASS
|
||||
# variables allow database access over an SSH tunnel.
|
||||
#
|
||||
# In the current implementation, we rely on Postgres to hold:
|
||||
# i. user-logged location data (locate module), and
|
||||
# ii. results from past weather forecast checks (weather module).
|
||||
#
|
||||
# A future version will hopefully make use of PostGIS's geocoding capabilities,
|
||||
# and add a vector database for the LLM module. Until then it's up to you if the
|
||||
# locate and weather modules are worth the hassle of maintaining Postgres.
|
||||
# ──────────
|
||||
#
|
||||
#─────────────────────────────── 𝐼 𝐵 𝐸 𝑇 𝑌 𝑂 𝑈 ─────────────────────────────────
|
||||
# 𝑅 𝐸 𝐶 𝐸 𝐼 𝑉 𝐸 𝐴 𝐿 𝑂 𝑇 𝑂 𝐹 𝐿 𝑂 𝑉 𝐸 𝐿 𝐸 𝑇 𝑇 𝐸 𝑅 𝑆 𝑂 𝑉 𝐸 𝑅
|
||||
#
|
||||
# .----------------. .----------------. .----------------. .----------------.
|
||||
# | .--------------. | .--------------. | .--------------. | .--------------. |
|
||||
# | | _____ | | | ____ ____ | | | __ | | | ______ | |
|
||||
# | | |_ _| | | ||_ \ / _|| | | / \ | | | |_ __ \ | |
|
||||
# | | | | | | | | \/ | | | | / /\ \ | | | | |__) | | |
|
||||
# | | | | | | | | |\ /| | | | | / ____ \ | | | | ___/ | |
|
||||
# | | _| |_ | | | _| |_\/_| |_ | | | _/ / \ \_ | | | _| |_ | |
|
||||
# | | |_____| | | ||_____||_____|| | ||____| |____|| | | |_____| | |
|
||||
# | | | | | | | | | | | | |
|
||||
# | '--------------' | '--------------' | '--------------' | '--------------' |
|
||||
# '----------------' '----------------' '----------------' '----------------'
|
||||
#
|
||||
# 𝙴 𝙼 𝙰 𝙸 𝙻
|
||||
#
|
||||
#─── imap & smtp: ────────────────────────────────────────────────────────────────────────
|
||||
IMAP_HOST=127.0.0.1
|
||||
IMAP_EMAIL=¿SECRET? # <--- enter yours
|
||||
IMAP_PASSWORD=¿SECRET? # <--- enter yours
|
||||
IMAP_PORT=1142
|
||||
IMAP_ENCRYPTION=STARTTLS
|
||||
SMTP_PORT=1024
|
||||
SMTP_ENCRYPTION=SSL
|
||||
AUTORESPONSE_WHITELIST=¿SECRET? # <--- enter complete/fragmented emails, or keywords
|
||||
AUTORESPONSE_BLACKLIST=¿SECRET? # <--- same deal-io
|
||||
AUTORESPONSE_CONTEXT=¿SECRET? # <--- inform the LLM why it's auto-responding for you'
|
||||
USER_FULLNAME=¿SECRET? # <--- more context for the LLM
|
||||
USER_BIO=¿SECRET? # <--- yet more context for the nosy LLM
|
||||
#─── notes: ───────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# This is primarily for summarizing incoming emails. Any IMAP account should work, but
|
||||
# I focused testing on a somewhat complex setup involving Protonmail Bridge.
|
||||
#
|
||||
# ──────────
|
||||
#
|
||||
#
|
||||
#─── ms365 (calendars): ──────────────────────────────────────────────────────────────
|
||||
ICAL_TOGGLE=True
|
||||
ICALENDARS='E68FE085-2ECA-4097-AF0A-8D38C404D8DA,AB5A0473-16DD-4916-BD6D-F12AC2455285'
|
||||
MS365_TOGGLE=False
|
||||
MS365_CLIENT_ID=¿SECRET? # <--- enter your client ID (found in Azure pane)
|
||||
MS365_TENANT_ID=¿SECRET? # <--- enter your tenant ID (found in Azure pane)
|
||||
MS365_SECRET=¿SECRET? # <--- enter your app secret (found in Azure pane)
|
||||
MS365_SCOPE='basic,calendar_all,Calendars.Read,Calendars.ReadWrite,offline_access'
|
||||
MS365_TOKEN_FILE=oauth_token.txt
|
||||
MS365_LOGIN_URL='https://login.microsoftonline.com'
|
||||
MS365_REDIRECT_PATH=¿SECRET? # <--- e.g. http://localhost:4444/o365/oauth_redirect
|
||||
#─── notes: ───────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# # MS365_CLIENT_ID, _TENANT_ID, _SECRET, AND _SCOPES must be obtained from Microsoft
|
||||
# via the Azure portal, by creating a new app registration and an accompanying secret.
|
||||
# MS365_THUMBPRINT is vestige of an earlier failed attempt to get this working, and
|
||||
# for now is deprecated. I recommend seeking out a well-reviewed tutorial for
|
||||
# creating an app on Azure with a client_id and secret and necessary scopes for
|
||||
# individual calendar access, because I had one heck of a time trying various approaches.
|
||||
# Do better, Microsoft.
|
||||
#
|
||||
# ──────────
|
||||
#
|
||||
#
|
||||
#──────────────────── L E T ' S G E T S I L L Y , ─────────────────────────────
|
||||
# T H E N G O B͎̝̪̼͉͜ O͖͕͇͚͉̼ N̢̦͖̺͔͎ K̠͓̠͖͜ E̝̼̫̙͔̞ R̡͇͖̙͉͎ S̡͉̠͎͙̪
|
||||
# W I T H O U R O W N
|
||||
#
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓██████▒▓██████▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
# ░▒▓████████▓▒ ░▒▓████████▓▒ ░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░
|
||||
#
|
||||
#
|
||||
# ( F O R R E A L T H O U G H , T H E S E A R E
|
||||
#
|
||||
#─── via comfyui (stable diffusion): ─────── S̝͖̦͓̪̻ O̡͖̘̫͇̟ H̢͔͔̫͉͜ O̢̢͉̞͍̘ T̟͍͍̪̦̞ R I G H T N O W
|
||||
LLM_URL=http://localhost:11434
|
||||
SYSTEM_MSG=You are a helpful AI assistant.
|
||||
DEFAULT_LLM=dolphin-mistral
|
||||
DEFAULT_VISION=llava-llama3
|
||||
OPENAI_API_KEY=¿SECRET? # <--- not presently implemented for anything
|
||||
SUMMARY_MODEL=dolphin-mistral
|
||||
SUMMARY_CHUNK_SIZE=4000
|
||||
SUMMARY_CHUNK_OVERLAP=100
|
||||
SUMMARY_TPW=1.3
|
||||
SUMMARY_LENGTH_RATIO=4
|
||||
SUMMARY_MIN_LENGTH=150
|
||||
SUMMARY_TOKEN_LIMIT=4096
|
||||
SUMMARY_INSTRUCT='You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
|
||||
SUMMARY_INSTRUCT_TTS='You are an AI assistant that summarizes emails -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
|
||||
DEFAULT_VOICE=joanne
|
||||
WHISPER_CPP_DIR='whisper.cpp'
|
||||
WHISPER_CPP_MODELS=tiny,base,base-en,small,medium,medium-en,large-v3
|
||||
WEBCLIPPER_TTS=elevenlabs
|
||||
EMAIL_SUMMARY_TTS=local
|
||||
YEAR_FMT="%Y"
|
||||
MONTH_FMT="%Y-%m %B"
|
||||
DAY_FMT="%Y-%m-%d %A"
|
||||
DAY_SHORT_FMT="%Y-%m-%d"
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# The exact values here will depend on what software you are using to inference an LLM,
|
||||
# and of course what models and capabilities are available through it. The script was
|
||||
# designed for use with `ollama`, but most of the functionality should be equal with
|
||||
# LM Studio, LocalAI, ect...
|
||||
#
|
||||
# DEFAULT_LLM is self-explanatory; DEFAULT_VISION is used for image recognition within
|
||||
# a multimodal chat context, such as on the ig module for generating intelligible
|
||||
# comments to Instagram posts, or more realistic captions for sd-generated images.
|
||||
#
|
||||
# Note it's possible to specify a separate model for general purposes and for
|
||||
# summarization tasks. The other SUMMARY_ variables call for some explanation,
|
||||
# in particular six that are most relevant when summarizing very long documents:
|
||||
#
|
||||
# SUMMARY_CHUNK_SIZE: determines the maximum length, in tokens, the pieces that are
|
||||
# split and sent individually to the model.
|
||||
#
|
||||
# SUMMARY_CHUNK_OVERLAP: determines how much of each chunk is overlapped with the prior
|
||||
# and next chunks. Set too high causes repetition, set too low
|
||||
# causes misunderstood confusion and poor summary results.
|
||||
# The summarization algorithm is flawed but I've gotten the best
|
||||
# results with this set around 100–200.
|
||||
#
|
||||
# SUMMARY_TPW: used in estimating the token count of a prompt for purposes of
|
||||
# complying with the maximum tokens a model can handle at once.
|
||||
# Best you can do is estimate. I tend to use long words a fair
|
||||
# excessively and found my average was 1.3 tokens per word. YMMV.
|
||||
#
|
||||
# SUMMARY_LENGTH_RATIO: this is the primary control over the length of generated
|
||||
# summaries, expressed as the ratio of original text length to
|
||||
# summary length. The default, 4, means the summaries will be
|
||||
# around 1/4 the length of the original text you provide it.
|
||||
#
|
||||
# SUMMARY_MIN_LENGTH: the default SUMMARY_LENGTH_RATIO of 4 isn't ideal for very
|
||||
# short texts, but setting it any lower sacrifices conciseness
|
||||
# in summaries of longer texts. In short one size doesn't fit
|
||||
# all. The compromise I landed on was to set a "maximum minimum"
|
||||
# summary length: under no circumstances will the script impose
|
||||
# a smaller maximum length than this value.
|
||||
#
|
||||
# SUMMARY_INSTRUCT: sets the prompt used when summarizing text.
|
||||
#
|
||||
# SUMMARY_INSTRUCT_TTS: sets a separate prompt for use when summarizing text where
|
||||
# tts output was requested; tends to yield "cleaner" audio
|
||||
# with less numbers (page numbers, citations) and other
|
||||
# information extraneous to spoken contexts.
|
||||
#
|
||||
# DEFAULT_VOICE: used for all tts tasks when a specific voice is not requested.
|
||||
#
|
||||
# ──────────
|
||||
#
|
||||
#
|
||||
#────,-_/────────── W E C A N E X P E R I M E N T W I T H ──────────.───────────
|
||||
# ' | ,~-,-. ,-. ,-. ,--. | --' ,--. ,-. ,--. ,-. ,-. |-- . ,-. ,-.
|
||||
# .^ | | | | ,--| | | | --' | -,- | --' | | | --' | ,--| | | | | | |
|
||||
# `--' ' ' ' `-^ `-| `--' `---| `--' ' ' `--' ' `--^ `' ` `-' ' '
|
||||
# , | ,-. | ~ 𝙸 𝙽 𝚃 𝙷 𝙴 𝙽 𝚄 𝙳 𝙴 . ~
|
||||
# `~~' `-+'
|
||||
# O R F U L L Y C L O T H E D ── U P T O Y O U
|
||||
#
|
||||
#─── via comfyui (stable diffusion): ───── ( B U T L E T M E K N O W , Y E A H ? )
|
||||
COMFYUI_URL=http://localhost:8188
|
||||
COMFYUI_DIR=/Users/sij/workshop/ComfyUI
|
||||
COMFYUI_LAUNCH_CMD="mamba activate comfyui && python main.py"
|
||||
OBSIDIAN_BANNER_SCENE=wallpaper
|
||||
PHOTOPRISM_USER=NOT_IMPLEMENTED
|
||||
PHOTOPRISM_PASS=NOT_IMPLEMENTED
|
||||
ANONYMIZED_TELEMETRY=False
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# COMFY_URL, as you may expect, should point to the URL you use to access ComfyUI. If you
|
||||
# don't know, watch for it in the server logs once ComfyUI is fully launched.
|
||||
#
|
||||
# COMFYUI_DIR, with similar self-evidence, should point to the base directory of your
|
||||
# ComfyUI installation (i.e. the folder that contains `models`, `inputs`, and `outputs`).
|
||||
# It can handle either a
|
||||
#
|
||||
# PhotoPrism integration is not yet implemented, so don't bother with that just yet.
|
||||
# ──────────
|
||||
#
|
||||
# D O N ' T M I S S O N E ───────────────────────────────────────
|
||||
#\ F I N A L S M A T T E R I N G O F Ⓜ Ⓘ Ⓢ Ⓒ Ⓔ Ⓛ Ⓛ Ⓐ Ⓝ Ⓨ \
|
||||
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# \ _ _ _/\\\\_ _ _ _ _ _ /\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# \ _ _ \/\\\\\\_ _ _ _ /\\\\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# \ _ _ \/\\\//\\\_ _ /\\\//\\\ _ _/\\\ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# \ _ _ \/\\\\///\\\/\\\/ \/\\\ _ _///_ _ _/\\\\\\\\\\_ _ _ _/\\\\\\\\_ _\
|
||||
# \ _ _ \/\\\ _\///\\\/ _ \/\\\ _ _/\\\ _ \/\\\////// _ _ _/\\\////// _ _\
|
||||
# \ _ _ \/\\\ _ _\/// _ _ \/\\\ _ _/\\\ _ \/\\\\\\\\\\_ _ /\\\_ _ _ _ _ _\
|
||||
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ \////////\\\_ _\//\\\ _ _ _ _ _\
|
||||
# \ _ _ \/\\\ _ _ _ _ _ _ \/\\\ _ _/\\\ _ _/\\\\\\\\\\_ _ \///\\\\\\\\_ _\
|
||||
# \ _ _ \///_ _ _ _ _ _ _ \///_ _ _///_ _ \////////// _ _ _ \//////// _ _\
|
||||
# \ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\
|
||||
# ─────────────────── A N D O T H E R W H A T - H A V E - Y O U S ──
|
||||
#
|
||||
#─── other needful API keys, mainly: ────────────────────────────────────────────────────
|
||||
CF_API_BASE_URL=¿SECRET? # <--- Cloudflare API URL
|
||||
CF_TOKEN=¿SECRET? # <--- Cloudflare Token
|
||||
VISUALCROSSING_BASE_URL='https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline'
|
||||
VISUALCROSSING_API_KEY=¿SECRET? # <--- VisualCrossing API key (for Weather)
|
||||
ELEVENLABS_API_KEY=¿SECRET? # <--- ElevenLabs API key (for TTS)
|
||||
COURTLISTENER_BASE_URL='https://www.courtlistener.com'
|
||||
COURTLISTENER_API_KEY=¿SECRET? # <--- CourtListener API key (for court docket entries)
|
||||
TIMING_API_URL='https://web.timingapp.com/api/v1'
|
||||
TIMING_API_KEY=¿SECRET? # <--- API key for macOS/web app Timing (time tracking)
|
||||
PUBLIC_KEY_FILE=sij.asc # <--- public PGP key (served at /pgp)
|
||||
MAC_ID=¿SECRET? # <--- Tailscale hostname for primary macOS (alerts)
|
||||
MAC_UN=¿SECRET? # <--- Primary macOS username
|
||||
MAC_PW=¿SECRET? # <--- Primary macOS password
|
||||
#─── notes: ──────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
#
|
||||
# CF_TOKEN: a Cloudflare token. This is used on the cf router for quick
|
||||
# deployment of new domains in tandem with Caddy and for ddns.
|
||||
#
|
||||
# VISUALCROSSING_API_KEY: used for obtaining weather forecasts. It is a very data-rich
|
||||
# yet affordable source of weather info, with a generous free
|
||||
# plan.
|
||||
#
|
||||
# ELEVENLABS_API_KEY: used when on the tts router if tts tasks are outsourced to
|
||||
# the state-of-the-art models at ElevenLabs.
|
||||
#
|
||||
# COURTLISTENER_API_KEY: used primarily on the hooks router, but likely relevant only
|
||||
# to legal professionals that will be aware what it is for.
|
||||
#
|
||||
# TIMING_API_URL: are used on the time router for generating various tasks
|
||||
# & related to timekeeping, as well as on the notes router for
|
||||
# TIMING_API_KEY: generating markdown-formatted timeslips. It requires an
|
||||
# active subscription to the Timing app (macOS or web), but
|
||||
# it's worth noting comes included in the SetApp subscribtion
|
||||
# bundle, for the same price, last I checked, as subscribing to
|
||||
# Timing alone. If you have a Mac and somehow don't know this
|
||||
# already, SetApp is an utterly insane value. I pay $15/mo for
|
||||
# apps that I would otherwise pay ~$100/mo for if subscribing
|
||||
# individually. I want to say I wasn't paid to say this, but
|
||||
# with those savings I almost feel like I was.
|
||||
#
|
||||
# MAC_ID: These last three variables are for a specific use case where
|
||||
# MAC_UN: you want certain commands run, or alerts appearing, on a
|
||||
# MAD_PW: designated macaOS computer. The alerts router is designed to
|
||||
# deliver OS-level notifications to the specified Mac when a
|
||||
# webhook gets a hit on specified keywords within the payload.
|
||||
# Setting the MAC_ID to the TS_ID of the target Mac, allows
|
||||
# the script to readily know whether it itself is the target
|
||||
# (this is relevant in a load-balancing context), and how to
|
||||
# reach the target if not — to wit, ssh using MagicDNS.
|
||||
|
88
sijapi/logs.py
Normal file
88
sijapi/logs.py
Normal file
|
@ -0,0 +1,88 @@
|
|||
import os
|
||||
import sys
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from colorama import Fore, Back, Style, init as colorama_init
|
||||
import traceback
|
||||
|
||||
# Force colorama to initialize for the current platform
|
||||
colorama_init(autoreset=True, strip=False, convert=True)
|
||||
|
||||
class ColorFormatter(logging.Formatter):
|
||||
"""Custom formatter to add colors to log levels."""
|
||||
COLOR_MAP = {
|
||||
logging.DEBUG: Fore.CYAN,
|
||||
logging.INFO: Fore.GREEN,
|
||||
logging.WARNING: Fore.YELLOW,
|
||||
logging.ERROR: Fore.RED,
|
||||
logging.CRITICAL: Fore.MAGENTA + Back.WHITE,
|
||||
}
|
||||
|
||||
def format(self, record):
|
||||
log_message = super().format(record)
|
||||
color = self.COLOR_MAP.get(record.levelno, '')
|
||||
return f"{color}{log_message}{Style.RESET_ALL}"
|
||||
|
||||
class Logger:
|
||||
def __init__(self, name, logs_dir):
|
||||
self.logs_dir = logs_dir
|
||||
self.logger = logging.getLogger(name)
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
|
||||
def setup_from_args(self, args):
|
||||
if not os.path.exists(self.logs_dir):
|
||||
os.makedirs(self.logs_dir)
|
||||
|
||||
# File handler
|
||||
handler_path = os.path.join(self.logs_dir, 'app.log')
|
||||
file_handler = RotatingFileHandler(handler_path, maxBytes=2000000, backupCount=10)
|
||||
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
||||
file_handler.setLevel(logging.DEBUG)
|
||||
|
||||
# Console handler
|
||||
console_handler = logging.StreamHandler(sys.stdout) # Explicitly use sys.stdout
|
||||
console_formatter = ColorFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
console_handler.setFormatter(console_formatter)
|
||||
|
||||
# Set console handler level based on args
|
||||
if args.debug:
|
||||
console_handler.setLevel(logging.DEBUG)
|
||||
else:
|
||||
console_handler.setLevel(logging.INFO)
|
||||
|
||||
# Add handlers to logger
|
||||
self.logger.addHandler(file_handler)
|
||||
self.logger.addHandler(console_handler)
|
||||
|
||||
# Test color output
|
||||
self.logger.debug("Debug message (should be Cyan)")
|
||||
self.logger.info("Info message (should be Green)")
|
||||
self.logger.warning("Warning message (should be Yellow)")
|
||||
self.logger.error("Error message (should be Red)")
|
||||
self.logger.critical("Critical message (should be Magenta on White)")
|
||||
|
||||
def get_logger(self):
|
||||
return self.logger
|
||||
|
||||
# Add this at the end of the file for testing
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--debug', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
logger = Logger("test", "logs")
|
||||
logger.setup_from_args(args)
|
||||
test_logger = logger.get_logger()
|
||||
|
||||
print("FORCE_COLOR:", os.environ.get('FORCE_COLOR'))
|
||||
print("NO_COLOR:", os.environ.get('NO_COLOR'))
|
||||
print("TERM:", os.environ.get('TERM'))
|
||||
print("PYCHARM_HOSTED:", os.environ.get('PYCHARM_HOSTED'))
|
||||
print("PYTHONIOENCODING:", os.environ.get('PYTHONIOENCODING'))
|
||||
|
||||
test_logger.debug("This is a debug message")
|
||||
test_logger.info("This is an info message")
|
||||
test_logger.warning("This is a warning message")
|
||||
test_logger.error("This is an error message")
|
||||
test_logger.critical("This is a critical message")
|
165
sijapi/routers/asr.py
Normal file
165
sijapi/routers/asr.py
Normal file
|
@ -0,0 +1,165 @@
|
|||
'''
|
||||
Automatic Speech Recognition module relying on the `whisper_cpp` implementation of OpenAI's Whisper model.
|
||||
Depends on:
|
||||
LOGGER, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR
|
||||
Notes:
|
||||
Performs exceptionally well on Apple Silicon. Other devices will benefit from future updates to optionally use `faster_whisper`, `insanely_faster_whisper`, and/or `whisper_jax`.
|
||||
'''
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Form, UploadFile, File
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
import tempfile
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from whisperplus.pipelines import mlx_whisper
|
||||
from youtube_dl import YoutubeDL
|
||||
from urllib.parse import unquote
|
||||
import subprocess
|
||||
import os
|
||||
import uuid
|
||||
from threading import Thread
|
||||
import multiprocessing
|
||||
import asyncio
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES
|
||||
|
||||
|
||||
asr = APIRouter()
|
||||
|
||||
class TranscribeParams(BaseModel):
|
||||
model: str = Field(default="small")
|
||||
output_srt : Optional[bool] = Field(default=False)
|
||||
language : Optional[str] = Field(None)
|
||||
split_on_word : Optional[bool] = Field(default=False)
|
||||
temperature : Optional[float] = Field(default=0)
|
||||
temp_increment : Optional[int] = Field(None)
|
||||
translate : Optional[bool] = Field(default=False)
|
||||
diarize : Optional[bool] = Field(default=False)
|
||||
tiny_diarize : Optional[bool] = Field(default=False)
|
||||
no_fallback : Optional[bool] = Field(default=False)
|
||||
output_json : Optional[bool] = Field(default=False)
|
||||
detect_language : Optional[bool] = Field(default=False)
|
||||
dtw : Optional[str] = Field(None)
|
||||
threads : Optional[int] = Field(None)
|
||||
|
||||
from urllib.parse import unquote
|
||||
import json
|
||||
|
||||
@asr.post("/asr")
|
||||
@asr.post("/transcribe")
|
||||
@asr.post("/v1/audio/transcription")
|
||||
async def transcribe_endpoint(
|
||||
file: UploadFile = File(...),
|
||||
params: str = Form(...)
|
||||
):
|
||||
try:
|
||||
# Decode the URL-encoded string
|
||||
decoded_params = unquote(params)
|
||||
|
||||
# Parse the JSON string
|
||||
parameters_dict = json.loads(decoded_params)
|
||||
|
||||
# Create TranscribeParams object
|
||||
parameters = TranscribeParams(**parameters_dict)
|
||||
except json.JSONDecodeError as json_err:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid JSON: {str(json_err)}")
|
||||
except Exception as err:
|
||||
raise HTTPException(status_code=400, detail=f"Error parsing parameters: {str(err)}")
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||
temp_file.write(await file.read())
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
transcription = await transcribe_audio(file_path=temp_file_path, params=parameters)
|
||||
return transcription
|
||||
|
||||
async def transcribe_audio(file_path, params: TranscribeParams):
|
||||
|
||||
file_path = convert_to_wav(file_path)
|
||||
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
|
||||
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
|
||||
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
|
||||
command.extend(['-m', str(model_path)])
|
||||
command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
|
||||
command.extend(['-np']) # Always enable no-prints
|
||||
|
||||
if params.split_on_word:
|
||||
command.append('-sow')
|
||||
if params.temperature > 0:
|
||||
command.extend(['-tp', str(params.temperature)])
|
||||
if params.temp_increment:
|
||||
command.extend(['-tpi', str(params.temp_increment)])
|
||||
if params.language:
|
||||
command.extend(['-l', params.language])
|
||||
elif params.detect_language:
|
||||
command.append('-dl')
|
||||
if params.translate:
|
||||
command.append('-tr')
|
||||
if params.diarize:
|
||||
command.append('-di')
|
||||
if params.tiny_diarize:
|
||||
command.append('-tdrz')
|
||||
if params.no_fallback:
|
||||
command.append('-nf')
|
||||
if params.output_srt:
|
||||
command.append('-osrt')
|
||||
elif params.output_json:
|
||||
command.append('-oj')
|
||||
else:
|
||||
command.append('-nt')
|
||||
if params.dtw:
|
||||
command.extend(['--dtw', params.dtw])
|
||||
|
||||
command.extend(['-f', file_path])
|
||||
|
||||
DEBUG(f"Command: {command}")
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
|
||||
if proc.returncode != 0:
|
||||
raise Exception(f"Error running command: {stderr.decode()}")
|
||||
|
||||
result = stdout.decode().strip()
|
||||
DEBUG(f"Result: {result}")
|
||||
return result
|
||||
|
||||
|
||||
def convert_to_wav(file_path: str):
|
||||
wav_file_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.wav")
|
||||
subprocess.run(["ffmpeg", "-y", "-i", file_path, "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_path], check=True)
|
||||
return wav_file_path
|
||||
def download_from_youtube(url: str):
|
||||
temp_file = os.path.join(ASR_DIR, f"{uuid.uuid4()}.mp3")
|
||||
ytdl_opts = {
|
||||
'outtmpl': temp_file,
|
||||
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
|
||||
'nooverwrites': True
|
||||
}
|
||||
with YoutubeDL(ytdl_opts) as ydl:
|
||||
ydl.download([url])
|
||||
return convert_to_wav(temp_file)
|
||||
|
||||
def format_srt_timestamp(seconds: float):
|
||||
milliseconds = round(seconds * 1000.0)
|
||||
hours = milliseconds // 3_600_000
|
||||
milliseconds -= hours * 3_600_000
|
||||
minutes = milliseconds // 60_000
|
||||
milliseconds -= minutes * 60_000
|
||||
seconds = milliseconds // 1_000
|
||||
milliseconds -= seconds * 1_000
|
||||
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
|
||||
|
||||
def write_srt(segments: list, output_file: str):
|
||||
with open(output_file, 'w') as f:
|
||||
for i, segment in enumerate(segments, start=1):
|
||||
start = format_srt_timestamp(segment['start'])
|
||||
end = format_srt_timestamp(segment['end'])
|
||||
text = segment['text']
|
||||
f.write(f"{i}\n{start} --> {end}\n{text}\n\n")
|
209
sijapi/routers/cf.py
Normal file
209
sijapi/routers/cf.py
Normal file
|
@ -0,0 +1,209 @@
|
|||
'''
|
||||
IN DEVELOPMENT - Cloudflare + Caddy module. Based on a bash script that's able to rapidly deploy new Cloudflare subdomains on new Caddy reverse proxy configurations, managing everything including restarting Caddy. The Python version needs more testing before actual use.
|
||||
'''
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from fastapi.responses import PlainTextResponse, JSONResponse
|
||||
from typing import Optional
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
|
||||
import httpx
|
||||
import asyncio
|
||||
from asyncio import sleep
|
||||
import os
|
||||
|
||||
cf = APIRouter()
|
||||
|
||||
class DNSRecordRequest(BaseModel):
|
||||
full_domain: str
|
||||
ip: Optional[str] = None
|
||||
port: str
|
||||
|
||||
|
||||
# Update to make get_zone_id async
|
||||
async def get_zone_id(domain: str) -> str:
|
||||
url = f"{CF_API_BASE_URL}/zones"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {CF_TOKEN}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
params = {"name": domain}
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if data['success']:
|
||||
if len(data['result']) > 0:
|
||||
return data['result'][0]['id']
|
||||
else:
|
||||
raise ValueError(f"No Zone ID found for domain '{domain}'")
|
||||
else:
|
||||
errors = ', '.join(err['message'] for err in data['errors'])
|
||||
raise ValueError(f"Cloudflare API returned errors: {errors}")
|
||||
|
||||
|
||||
|
||||
async def update_caddyfile(full_domain, caddy_ip, port):
|
||||
caddy_config = f"""
|
||||
{full_domain} {{
|
||||
reverse_proxy {caddy_ip}:{port}
|
||||
tls {{
|
||||
dns cloudflare {{"$CLOUDFLARE_API_TOKEN"}}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
with open(CADDYFILE_PATH, 'a') as file:
|
||||
file.write(caddy_config)
|
||||
|
||||
# Using asyncio to create subprocess
|
||||
proc = await asyncio.create_subprocess_exec("sudo", "systemctl", "restart", "caddy")
|
||||
await proc.communicate()
|
||||
|
||||
|
||||
# Retry mechanism for API calls
|
||||
async def retry_request(url, headers, max_retries=5, backoff_factor=1):
|
||||
for retry in range(max_retries):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
|
||||
ERR(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
|
||||
await sleep(backoff_factor * (2 ** retry))
|
||||
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
|
||||
|
||||
# Helper function to load Caddyfile domains
|
||||
def load_caddyfile_domains():
|
||||
with open(CADDYFILE_PATH, 'r') as file:
|
||||
caddyfile_content = file.read()
|
||||
domains = []
|
||||
for line in caddyfile_content.splitlines():
|
||||
if line.strip() and not line.startswith('#'):
|
||||
if "{" in line:
|
||||
domain = line.split("{")[0].strip()
|
||||
domains.append(domain)
|
||||
return domains
|
||||
|
||||
# Endpoint to add new configuration to Cloudflare, Caddyfile, and cf_domains.json
|
||||
@cf.post("/cf/add_config")
|
||||
async def add_config(record: DNSRecordRequest):
|
||||
full_domain = record.full_domain
|
||||
caddy_ip = record.ip or "localhost"
|
||||
port = record.port
|
||||
|
||||
# Extract subdomain and domain
|
||||
parts = full_domain.split(".")
|
||||
if len(parts) == 2:
|
||||
domain = full_domain
|
||||
subdomain = "@"
|
||||
else:
|
||||
subdomain = parts[0]
|
||||
domain = ".".join(parts[1:])
|
||||
|
||||
zone_id = await get_zone_id(domain)
|
||||
if not zone_id:
|
||||
raise HTTPException(status_code=400, detail=f"Zone ID for {domain} could not be found")
|
||||
|
||||
# API call setup for Cloudflare A record
|
||||
endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {CF_TOKEN}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
data = {
|
||||
"type": "A",
|
||||
"name": subdomain,
|
||||
"content": CF_IP,
|
||||
"ttl": 120,
|
||||
"proxied": True
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(endpoint, headers=headers, json=data)
|
||||
|
||||
result = response.json()
|
||||
|
||||
if not result.get("success", False):
|
||||
error_message = result.get("errors", [{}])[0].get("message", "Unknown error")
|
||||
error_code = result.get("errors", [{}])[0].get("code", "Unknown code")
|
||||
raise HTTPException(status_code=400, detail=f"Failed to create A record: {error_message} (Code: {error_code})")
|
||||
|
||||
# Update Caddyfile
|
||||
await update_caddyfile(full_domain, caddy_ip, port)
|
||||
|
||||
return {"message": "Configuration added successfully"}
|
||||
|
||||
|
||||
|
||||
@cf.get("/cf/list_zones")
|
||||
async def list_zones_endpoint():
|
||||
domains = await list_zones()
|
||||
return JSONResponse(domains)
|
||||
|
||||
async def list_zones():
|
||||
endpoint = f"{CF_API_BASE_URL}/zones"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {CF_TOKEN}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
async with httpx.AsyncClient() as client: # async http call
|
||||
response = await client.get(endpoint, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
|
||||
if not result.get("success"):
|
||||
raise HTTPException(status_code=400, detail="Failed to retrieve zones from Cloudflare")
|
||||
|
||||
zones = result.get("result", [])
|
||||
domains = {}
|
||||
|
||||
for zone in zones:
|
||||
zone_id = zone.get("id")
|
||||
zone_name = zone.get("name")
|
||||
domains[zone_name] = {"zone_id": zone_id}
|
||||
|
||||
records_endpoint = f"{CF_API_BASE_URL}/zones/{zone_id}/dns_records"
|
||||
async with httpx.AsyncClient() as client: # async http call
|
||||
records_response = await client.get(records_endpoint, headers=headers)
|
||||
records_result = records_response.json()
|
||||
|
||||
if not records_result.get("success"):
|
||||
raise HTTPException(status_code=400, detail=f"Failed to retrieve DNS records for zone {zone_name}")
|
||||
|
||||
records = records_result.get("result", [])
|
||||
for record in records:
|
||||
record_id = record.get("id")
|
||||
domain_name = record.get("name").replace(f".{zone_name}", "")
|
||||
domains[zone_name].setdefault(domain_name, {})["dns_id"] = record_id
|
||||
|
||||
return domains
|
||||
|
||||
@cf.get("/cf/compare_caddy", response_class=PlainTextResponse)
|
||||
async def crossreference_caddyfile():
|
||||
cf_domains_data = await list_zones()
|
||||
caddyfile_domains = load_caddyfile_domains()
|
||||
|
||||
cf_domains_list = [
|
||||
f"{sub}.{domain}" if sub != "@" else domain
|
||||
for domain, data in cf_domains_data.items()
|
||||
for sub in data.get("subdomains", {}).keys()
|
||||
]
|
||||
caddyfile_domains_set = set(caddyfile_domains)
|
||||
cf_domains_set = set(cf_domains_list)
|
||||
|
||||
only_in_caddyfile = caddyfile_domains_set - cf_domains_set
|
||||
only_in_cf_domains = cf_domains_set - caddyfile_domains_set
|
||||
|
||||
markdown_output = "# Cross-reference cf_domains.json and Caddyfile\n\n"
|
||||
markdown_output += "## Domains only in Caddyfile:\n\n"
|
||||
for domain in only_in_caddyfile:
|
||||
markdown_output += f"- **{domain}**\n"
|
||||
|
||||
markdown_output += "\n## Domains only in cf_domains.json:\n\n"
|
||||
for domain in only_in_cf_domains:
|
||||
markdown_output += f"- **{domain}**\n"
|
||||
|
||||
return markdown_output
|
253
sijapi/routers/email.py
Normal file
253
sijapi/routers/email.py
Normal file
|
@ -0,0 +1,253 @@
|
|||
'''
|
||||
IN DEVELOPMENT Email module. Uses IMAP and SMTP login credentials to monitor an inbox and summarize incoming emails that match certain criteria and save the Text-To-Speech converted summaries into a specified "podcast" folder.
|
||||
UNIMPLEMENTED: AI auto-responder.
|
||||
'''
|
||||
from fastapi import APIRouter
|
||||
import asyncio
|
||||
from imbox import Imbox
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
from pathlib import Path
|
||||
from shutil import move
|
||||
import tempfile
|
||||
import re
|
||||
import ssl
|
||||
from smtplib import SMTP_SSL
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from datetime import datetime as dt_datetime
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Any
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import HOME_DIR, DATA_DIR, OBSIDIAN_VAULT_DIR, PODCAST_DIR, IMAP, OBSIDIAN_JOURNAL_DIR, DEFAULT_VOICE, AUTORESPONSE_BLACKLIST, AUTORESPONSE_WHITELIST, AUTORESPONSE_CONTEXT, USER_FULLNAME, USER_BIO, AUTORESPOND, TZ
|
||||
from sijapi.routers import summarize, tts, llm
|
||||
from sijapi.utilities import clean_text, assemble_journal_path, localize_dt, extract_text, prefix_lines
|
||||
|
||||
|
||||
email = APIRouter(tags=["private"])
|
||||
|
||||
|
||||
class Contact(BaseModel):
|
||||
email: str
|
||||
name: str
|
||||
class EmailModel(BaseModel):
|
||||
sender: str
|
||||
recipients: List[Contact]
|
||||
datetime_received: dt_datetime
|
||||
subject: str
|
||||
body: str
|
||||
attachments: Optional[List[Any]] = None
|
||||
|
||||
def imap_conn():
|
||||
return Imbox(IMAP.host,
|
||||
username=IMAP.email,
|
||||
password=IMAP.password,
|
||||
port=IMAP.imap_port,
|
||||
ssl=IMAP.imap_encryption == 'SSL',
|
||||
starttls=IMAP.imap_encryption == 'STARTTLS')
|
||||
|
||||
|
||||
def clean_email_content(html_content):
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
return re.sub(r'[ \t\r\n]+', ' ', soup.get_text()).strip()
|
||||
|
||||
|
||||
async def extract_attachments(attachments) -> List[str]:
|
||||
attachment_texts = []
|
||||
for attachment in attachments:
|
||||
attachment_name = attachment.get('filename', 'tempfile.txt')
|
||||
_, ext = os.path.splitext(attachment_name)
|
||||
ext = ext.lower() if ext else '.txt'
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
|
||||
tmp_file.write(attachment['content'].getvalue())
|
||||
tmp_file_path = tmp_file.name
|
||||
|
||||
try:
|
||||
attachment_text = await extract_text(tmp_file_path)
|
||||
attachment_texts.append(attachment_text)
|
||||
finally:
|
||||
if os.path.exists(tmp_file_path):
|
||||
os.remove(tmp_file_path)
|
||||
|
||||
return attachment_texts
|
||||
|
||||
|
||||
async def process_unread_emails(auto_respond: bool = AUTORESPOND, summarize_emails: bool = True, podcast: bool = True):
|
||||
while True:
|
||||
try:
|
||||
with imap_conn() as inbox:
|
||||
unread_messages = inbox.messages(unread=True)
|
||||
for uid, message in unread_messages:
|
||||
recipients = [Contact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
|
||||
this_email = EmailModel(
|
||||
sender=message.sent_from[0]['email'],
|
||||
datetime_received=localize_dt(message.date),
|
||||
recipients=recipients,
|
||||
subject=message.subject,
|
||||
body=clean_email_content(message.body['html'][0]) if message.body['html'] else clean_email_content(message.body['plain'][0]) or "",
|
||||
attachments=message.attachments
|
||||
)
|
||||
|
||||
DEBUG(f"\n\nProcessing email: {this_email.subject}\n\n")
|
||||
md_path, md_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".md")
|
||||
tts_path, tts_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".wav")
|
||||
if summarize_emails:
|
||||
email_content = f'At {this_email.datetime_received}, {this_email.sender} sent an email with the subject line "{this_email.subject}". The email in its entirety reads: \n\n{this_email.body}\n"'
|
||||
if this_email.attachments:
|
||||
attachment_texts = await extract_attachments(this_email.attachments)
|
||||
email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts])
|
||||
|
||||
summary = await summarize.summarize_text(email_content)
|
||||
await tts.local_tts(text_content = summary, speed = 1.1, voice = DEFAULT_VOICE, podcast = podcast, output_path = tts_path)
|
||||
|
||||
if podcast:
|
||||
if PODCAST_DIR.exists():
|
||||
tts.copy_to_podcast_dir(tts_path)
|
||||
else:
|
||||
ERR(f"PODCAST_DIR does not exist: {PODCAST_DIR}")
|
||||
|
||||
save_email_as_markdown(this_email, summary, md_path, tts_relative)
|
||||
else:
|
||||
save_email_as_markdown(this_email, None, md_path, None)
|
||||
|
||||
if auto_respond and should_auto_respond(this_email):
|
||||
DEBUG(f"Auto-responding to {this_email.subject}")
|
||||
auto_response_subject = 'Auto-Response Re:' + this_email.subject
|
||||
auto_response_body = await generate_auto_response_body(this_email)
|
||||
DEBUG(f"Auto-response: {auto_response_body}")
|
||||
await send_auto_response(this_email.sender, auto_response_subject, auto_response_body)
|
||||
|
||||
inbox.mark_seen(uid)
|
||||
|
||||
await asyncio.sleep(30)
|
||||
except Exception as e:
|
||||
ERR(f"An error occurred: {e}")
|
||||
await asyncio.sleep(30)
|
||||
|
||||
|
||||
def save_email_as_markdown(email: EmailModel, summary: str, md_path: Path, tts_path: Path):
|
||||
'''
|
||||
Saves an email as a markdown file in the specified directory.
|
||||
Args:
|
||||
email (EmailModel): The email object containing email details.
|
||||
summary (str): The summary of the email.
|
||||
tts_path (str): The path to the text-to-speech audio file.
|
||||
'''
|
||||
|
||||
# Sanitize filename to avoid issues with filesystems
|
||||
filename = f"{email.datetime_received.strftime('%Y%m%d%H%M%S')}_{email.subject.replace('/', '-')}.md".replace(':', '-').replace(' ', '_')
|
||||
|
||||
summary = prefix_lines(summary, '> ')
|
||||
# Create the markdown content
|
||||
markdown_content = f'''---
|
||||
date: {email.datetime_received.strftime('%Y-%m-%d')}
|
||||
tags:
|
||||
- email
|
||||
---
|
||||
| | | |
|
||||
| --: | :--: | :--: |
|
||||
| *received* | **{email.datetime_received.strftime('%B %d, %Y at %H:%M:%S %Z')}** | |
|
||||
| *from* | **[[{email.sender}]]** | |
|
||||
| *to* | {', '.join([f'**[[{recipient}]]**' for recipient in email.recipients])} | |
|
||||
| *subject* | **{email.subject}** | |
|
||||
'''
|
||||
|
||||
if summary:
|
||||
markdown_content += f'''
|
||||
> [!summary] Summary
|
||||
> {summary}
|
||||
'''
|
||||
|
||||
if tts_path:
|
||||
markdown_content += f'''
|
||||
![[{tts_path}]]
|
||||
'''
|
||||
|
||||
markdown_content += f'''
|
||||
---
|
||||
{email.body}
|
||||
'''
|
||||
|
||||
with open(md_path, 'w', encoding='utf-8') as md_file:
|
||||
md_file.write(markdown_content)
|
||||
|
||||
DEBUG(f"Saved markdown to {md_path}")
|
||||
|
||||
|
||||
AUTORESPONSE_SYS = "You are a helpful AI assistant that generates personalized auto-response messages to incoming emails."
|
||||
|
||||
async def generate_auto_response_body(e: EmailModel, response_style: str = "professional") -> str:
|
||||
age = dt_datetime.now(TZ) - e.datetime_received
|
||||
prompt = f'''
|
||||
Please generate a personalized auto-response to the following email. The email is from {e.sender} and was sent {age} ago with the subject line "{e.subject}." You are auto-responding on behalf of {USER_FULLNAME}, who is described by the following short bio (strictly for your context -- do not recite this in the response): "{USER_BIO}." {USER_FULLNAME} is unable to respond himself, because {AUTORESPONSE_CONTEXT}. Everything from here to ~~//END//~~ is the email body.
|
||||
{e.body}
|
||||
~~//END//~~
|
||||
Keep your auto-response {response_style} and to the point, but do aim to make it responsive specifically to the sender's inquiry.
|
||||
'''
|
||||
|
||||
try:
|
||||
response = await llm.query_ollama(prompt, AUTORESPONSE_SYS, 400)
|
||||
return response
|
||||
except Exception as e:
|
||||
ERR(f"Error generating auto-response: {str(e)}")
|
||||
return "Thank you for your email. Unfortunately, an error occurred while generating the auto-response. We apologize for any inconvenience."
|
||||
|
||||
async def send_auto_response(to_email, subject, body):
|
||||
try:
|
||||
message = MIMEMultipart()
|
||||
message['From'] = IMAP.email # smtp_username
|
||||
message['To'] = to_email
|
||||
message['Subject'] = subject
|
||||
message.attach(MIMEText(body, 'plain'))
|
||||
|
||||
# DEBUG(f"Attempting to send auto_response to {to_email} concerning {subject}. We will use {IMAP.host}:{IMAP.smtp_port}, un: {IMAP.email}, pw: {IMAP.password}")
|
||||
|
||||
try:
|
||||
DEBUG(f"Initiating attempt to send auto-response via SMTP at {IMAP.host}:{IMAP.smtp_port}...")
|
||||
context = ssl._create_unverified_context()
|
||||
|
||||
with SMTP_SSL(IMAP.host, IMAP.smtp_port, context=context) as server:
|
||||
server.login(IMAP.email, IMAP.password)
|
||||
DEBUG(f"Successfully logged in to {IMAP.host} at {IMAP.smtp_port} as {IMAP.email}. Attempting to send email now.")
|
||||
server.send_message(message)
|
||||
|
||||
INFO(f"Auto-response sent to {to_email} concerning {subject}")
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Failed to send auto-response email to {to_email}: {e}")
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error in preparing/sending auto-response: {e}")
|
||||
raise e
|
||||
|
||||
def should_auto_respond(email: EmailModel) -> bool:
|
||||
def matches_list(item: str, email: EmailModel) -> bool:
|
||||
if '@' in item:
|
||||
if item in email.sender:
|
||||
return True
|
||||
else:
|
||||
if item.lower() in email.subject.lower() or item.lower() in email.body.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
if AUTORESPONSE_WHITELIST:
|
||||
for item in AUTORESPONSE_WHITELIST:
|
||||
if matches_list(item, email):
|
||||
if AUTORESPONSE_BLACKLIST:
|
||||
for blacklist_item in AUTORESPONSE_BLACKLIST:
|
||||
if matches_list(blacklist_item, email):
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
if AUTORESPONSE_BLACKLIST:
|
||||
for item in AUTORESPONSE_BLACKLIST:
|
||||
if matches_list(item, email):
|
||||
return False
|
||||
return True
|
||||
|
||||
@email.on_event("startup")
|
||||
async def startup_event():
|
||||
asyncio.create_task(process_unread_emails())
|
973
sijapi/routers/ig.py
Normal file
973
sijapi/routers/ig.py
Normal file
|
@ -0,0 +1,973 @@
|
|||
'''
|
||||
IN DEVELOPMENT: Instagram AI bot module.
|
||||
'''
|
||||
from fastapi import APIRouter, UploadFile
|
||||
import os
|
||||
import io
|
||||
import copy
|
||||
import re
|
||||
import jwt
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
import pyotp
|
||||
import time
|
||||
import pytz
|
||||
import requests
|
||||
import tempfile
|
||||
import random
|
||||
import subprocess
|
||||
import urllib.request
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter
|
||||
from time import sleep
|
||||
from datetime import timedelta, datetime as date
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel
|
||||
from typing import Dict, List, Optional
|
||||
import instagrapi
|
||||
from instagrapi import Client as igClient
|
||||
from instagrapi.types import UserShort
|
||||
from urllib.parse import urlparse
|
||||
from instagrapi.exceptions import LoginRequired as ClientLoginRequiredError
|
||||
import json
|
||||
from ollama import Client as oLlama
|
||||
from sd import sd
|
||||
from dotenv import load_dotenv
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, COMFYUI_DIR
|
||||
|
||||
import io
|
||||
from io import BytesIO
|
||||
import base64
|
||||
|
||||
ig = APIRouter()
|
||||
|
||||
class IG_Request(BaseModel):
|
||||
file: Optional[UploadFile] = None # upload a particular file to Instagram
|
||||
profile: Optional[str] = None # specify the profile account to use (uses the shortnames defined per folders and the config file)
|
||||
local_only: Optional[bool] = False # overrides all other settings to ensure images are generated locally and stay local
|
||||
openai: Optional[str] = None # OpenAI API key; if included, will rely on it for DALL-E, GPT-4, and GPT-4-Vision unless otherwise overridden
|
||||
llm: Optional[str] = "llama3" # if a valid OpenAI model name is provided, it will be used; otherwise it will attempt to match to an Ollama model (if one exists)
|
||||
i2t: Optional[str] = "llava" # set to GPT-4-Vision to use the OpenAI image-2-text model, otherwise this will attempt to match to a vision-capable Ollama model
|
||||
t2i: Optional[str] = None # set to DALL-E to use the OpenAI model, or use it to override the StableDiffusion workflow that's otherwise selected. Leave blank to use defaults per the config file
|
||||
ig_post: Optional[str] = True # if given a value, will use this as the category of post; if given no value, willuse all categories unless ig_comment_only is enabled
|
||||
ig_comment: Optional[str] = None # if given a value, will use this as the category of comment; if given no value, will use all categories unless ig_post_only is enabled
|
||||
ig_comment_user: Optional[str] = None # target a particular user for comments
|
||||
ig_comment_url: Optional[str] = None # target a particular ig url for comments
|
||||
ghost_post: Optional[bool] = True # enable posting to Ghost
|
||||
sleep_short: Optional[int] = 5 # average duration of short intervals (a few seconds is adequate; this is to simulate doomscrolling latency)
|
||||
sleep_long: Optional[int] = 180 # agerage duration of long intervals (this should be about a minute at least; it simulates the time it takes to write a comment or prepare a post)
|
||||
|
||||
IG_PROFILE = os.getenv("IG_PROFILE")
|
||||
IG_SHORT_SLEEP = int(os.getenv("IG_SHORT_SLEEP", 5))
|
||||
IG_LONG_SLEEP = int(os.getenv("IG_LONG_SLEEP", 180))
|
||||
IG_POST_GHOST = os.getenv("IG_POST_GHOST")
|
||||
IG_VISION_LLM = os.getenv("IG_VISION_LLM")
|
||||
IG_PROMPT_LLM = os.getenv("IG_PROMPT_LLM")
|
||||
IG_IMG_GEN = os.getenv("IG_IMG_GEN", "ComfyUI")
|
||||
IG_OUTPUT_PLATFORMS = os.getenv("IG_OUTPUT_PLATFORMS", "ig,ghost,obsidian").split(',')
|
||||
SD_WORKFLOWS_DIR = os.path.join(COMFYUI_DIR, 'workflows')
|
||||
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
|
||||
IG_PROFILES_DIR = os.path.join(BASE_DIR, 'profiles')
|
||||
IG_PROFILE_DIR = os.path.join(IG_PROFILES_DIR, PROFILE)
|
||||
IG_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'images')
|
||||
IG_PROFILE_CONFIG_PATH = os.path.join(IG_PROFILE_DIR, f'config.json')
|
||||
IG_VIEWED_IMAGES_DIR = os.path.join(IG_PROFILE_DIR, 'downloads')
|
||||
|
||||
with open(IG_PROFILE_CONFIG_PATH, 'r') as config_file:
|
||||
PROFILE_CONFIG = json.load(config_file)
|
||||
|
||||
if not os.path.exists(IG_IMAGES_DIR):
|
||||
os.makedirs(IG_IMAGES_DIR )
|
||||
|
||||
OPENAI_API_KEY=PROFILE_CONFIG.get("openai_key")
|
||||
|
||||
|
||||
###################
|
||||
### VALIDATION ###
|
||||
##################
|
||||
|
||||
|
||||
if args.profile and args.posttype and not args.custompost and not args.posttype in PROFILE_CONFIG["posts"]:
|
||||
print ("ERROR: NO SUCH POST TYPE IS AVAILABLE FOR THIS PROFILE.")
|
||||
|
||||
if args.profile and args.commenttype and not args.commenttype in PROFILE_CONFIG["comments"]:
|
||||
print ("ERROR: NO SUCH COMMENT TYPE IS AVAILABLE FOR THIS PROFILE.")
|
||||
|
||||
|
||||
####################
|
||||
### CLIENT SETUP ###
|
||||
####################
|
||||
|
||||
cl = igClient(request_timeout=1)
|
||||
|
||||
|
||||
IMG_GEN = OpenAI(api_key=OPENAI_API_KEY)
|
||||
IMG_MODEL = "dall-e-3"
|
||||
|
||||
COMFYUI_URL = "http://localhost:8188"
|
||||
CLIENT_ID = str(uuid.uuid4())
|
||||
|
||||
|
||||
###############################
|
||||
### INSTAGRAM & GHOST SETUP ###
|
||||
###############################
|
||||
IG_USERNAME = PROFILE_CONFIG.get("ig_name")
|
||||
IG_PASSWORD = PROFILE_CONFIG.get("ig_pass")
|
||||
IG_SECRET_KEY = PROFILE_CONFIG.get("ig_2fa_secret")
|
||||
IG_SESSION_PATH = os.path.join(IG_PROFILE_DIR, f'credentials.json')
|
||||
|
||||
GHOST_API_URL=PROFILE_CONFIG.get("ghost_admin_url")
|
||||
GHOST_API_KEY=PROFILE_CONFIG.get("ghost_admin_api_key")
|
||||
GHOST_CONTENT_KEY=PROFILE_CONFIG.get("ghost_content_key")
|
||||
|
||||
########################
|
||||
### LLM PROMPT SETUP ###
|
||||
########################
|
||||
IMG_PROMPT_SYS = PROFILE_CONFIG.get("img_prompt_sys")
|
||||
IMG_DESCRIPTION_SYS = PROFILE_CONFIG.get("img_description_sys")
|
||||
COMMENT_PROMPT_SYS = PROFILE_CONFIG.get("img_comment_sys")
|
||||
HASHTAGS = PROFILE_CONFIG.get("preferred_hashtags", [])
|
||||
IMAGE_URL = args.image_url
|
||||
rollover_time = 1702605780
|
||||
COMPLETED_MEDIA_LOG = os.path.join(IG_PROFILE_DIR, f'completed-media.txt')
|
||||
TOTP = pyotp.TOTP(IG_SECRET_KEY)
|
||||
SHORT = args.shortsleep
|
||||
LONG = args.longsleep
|
||||
|
||||
|
||||
def follow_by_username(username) -> bool:
|
||||
"""
|
||||
Follow a user, return true if successful false if not.
|
||||
"""
|
||||
userid = cl.user_id_from_username(username)
|
||||
sleep(SHORT)
|
||||
return cl.user_follow(userid)
|
||||
|
||||
def unfollow_by_username(username) -> bool:
|
||||
"""
|
||||
Unfollow a user, return true if successful false if not.
|
||||
"""
|
||||
userid = cl.user_id_from_username(username)
|
||||
sleep(SHORT)
|
||||
return cl.user_unfollow(userid)
|
||||
|
||||
def get_poster_of_post(shortcode):
|
||||
media_info = cl.media_info_by_shortcode(shortcode)
|
||||
poster_username = media_info.user.username
|
||||
return(poster_username)
|
||||
|
||||
|
||||
def get_followers(amount: int = 0) -> Dict[int, UserShort]:
|
||||
"""
|
||||
Get followers, return Dict of user_id and User object
|
||||
"""
|
||||
return cl.user_followers(cl.user_id, amount=amount)
|
||||
|
||||
|
||||
def get_followers_usernames(amount: int = 0) -> List[str]:
|
||||
"""
|
||||
Get bot's followers usernames, return List of usernames
|
||||
"""
|
||||
followers = cl.user_followers(cl.user_id, amount=amount)
|
||||
sleep(SHORT)
|
||||
return [user.username for user in followers.values()]
|
||||
|
||||
def get_following(amount: int = 0) -> Dict[int, UserShort]:
|
||||
"""
|
||||
Get bot's followed users, return Dict of user_id and User object
|
||||
"""
|
||||
sleep(SHORT)
|
||||
return cl.user_following(cl.user_id, amount=amount)
|
||||
|
||||
|
||||
def get_user_media(username, amount=30):
|
||||
"""
|
||||
Fetch recent media for a given username, return List of medias
|
||||
"""
|
||||
|
||||
DEBUG(f"Fetching recent media for {username}...")
|
||||
user_id = cl.user_id_from_username(username)
|
||||
medias = cl.user_medias(user_id, amount)
|
||||
final_medias = []
|
||||
for media in medias:
|
||||
sleep(SHORT)
|
||||
if media.media_type == 1:
|
||||
final_medias.append(media)
|
||||
return final_medias
|
||||
|
||||
|
||||
def get_user_image_urls(username, amount=30) -> List[str]:
|
||||
"""
|
||||
Fetch recent media URLs for a given username, return List of media URLs
|
||||
"""
|
||||
DEBUG(f"Fetching recent media URLs for {username}...")
|
||||
user_id = cl.user_id_from_username(username)
|
||||
medias = cl.user_medias(user_id, amount)
|
||||
|
||||
urls = []
|
||||
for media in medias:
|
||||
sleep(SHORT)
|
||||
if media.media_type == 1 and media.thumbnail_url:
|
||||
urls.append(media.thumbnail_url)
|
||||
|
||||
return urls
|
||||
|
||||
def is_valid_url(url):
|
||||
try:
|
||||
result = urlparse(url)
|
||||
return all([result.scheme, result.netloc])
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def get_random_follower():
|
||||
followers = cl.get_followers_usernames()
|
||||
sleep(SHORT)
|
||||
return random.choice(followers)
|
||||
|
||||
|
||||
def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = None, amount:int = args.count):
|
||||
if not ht_type:
|
||||
ht_type = args.commentmode
|
||||
DEBUG(f"Fetching {ht_type} media for hashtag: {hashtag}")
|
||||
ht_medias = []
|
||||
while True:
|
||||
sleep(SHORT)
|
||||
if ht_type == "top":
|
||||
ht_medias.extend(cl.hashtag_medias_top(name=hashtag, amount=amount*10))
|
||||
elif ht_type == "recent":
|
||||
ht_medias.extend(cl.hashtag_medias_recent(name=hashtag, amount=amount*10))
|
||||
|
||||
filtered_medias = filter_medias(ht_medias, days_ago_max=days_ago_max)
|
||||
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}")
|
||||
|
||||
if len(filtered_medias) >= amount:
|
||||
DEBUG(f"Desired amount of {amount} filtered media reached.")
|
||||
break
|
||||
|
||||
return filtered_medias
|
||||
|
||||
def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int = args.count):
|
||||
if not ht_type:
|
||||
ht_type = args.commentmode
|
||||
DEBUG(f"Fetching {ht_type} media.")
|
||||
filtered_medias = []
|
||||
while len(filtered_medias) < amount:
|
||||
hashtag = random.choice(HASHTAGS)
|
||||
DEBUG(f"Using hashtag: {hashtag}")
|
||||
fetched_medias = []
|
||||
sleep(SHORT)
|
||||
if ht_type == "top":
|
||||
fetched_medias = cl.hashtag_medias_top(name=hashtag, amount=50) # Fetch a large batch to filter from
|
||||
elif ht_type == "recent":
|
||||
fetched_medias = cl.hashtag_medias_recent(name=hashtag, amount=50) # Same for recent
|
||||
|
||||
current_filtered_medias = filter_medias(fetched_medias, days_ago_max=days_ago_max)
|
||||
filtered_medias.extend(current_filtered_medias)
|
||||
DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}")
|
||||
|
||||
# Trim the list if we've collected more than needed
|
||||
if len(filtered_medias) > amount:
|
||||
filtered_medias = filtered_medias[:amount]
|
||||
DEBUG(f"Desired amount of {amount} filtered media reached.")
|
||||
break
|
||||
else:
|
||||
DEBUG(f"Total filtered media count so far: {len(filtered_medias)}")
|
||||
|
||||
return filtered_medias
|
||||
|
||||
def filter_medias(
|
||||
medias: List,
|
||||
like_count_min=None,
|
||||
like_count_max=None,
|
||||
comment_count_min=None,
|
||||
comment_count_max=None,
|
||||
days_ago_max=None,
|
||||
):
|
||||
# Adjust to use your preferred timezone, for example, UTC
|
||||
days_back = date.now(pytz.utc) - timedelta(days=days_ago_max) if days_ago_max else None
|
||||
return [
|
||||
media for media in medias
|
||||
if (
|
||||
(like_count_min is None or media.like_count >= like_count_min) and
|
||||
(like_count_max is None or media.like_count <= like_count_max) and
|
||||
(comment_count_min is None or media.comment_count >= comment_count_min) and
|
||||
(comment_count_max is None or media.comment_count <= comment_count_max) and
|
||||
(days_ago_max is None or (media.taken_at and media.taken_at > days_back)) and not
|
||||
check_media_in_completed_lists(media)
|
||||
)
|
||||
]
|
||||
|
||||
def add_media_to_completed_lists(media):
|
||||
"""
|
||||
Add a media to the completed lists after interacting with it.
|
||||
"""
|
||||
with open(COMPLETED_MEDIA_LOG, 'a') as file:
|
||||
file.write(f"{str(media.pk)}\n")
|
||||
|
||||
|
||||
def check_media_in_completed_lists(media):
|
||||
"""
|
||||
Check if a media is in the completed lists.
|
||||
"""
|
||||
with open(COMPLETED_MEDIA_LOG, 'r') as file:
|
||||
completed_media = file.read().splitlines()
|
||||
return str(media.pk) in completed_media
|
||||
|
||||
|
||||
|
||||
def download_and_resize_image(url: str, download_path: str = None, max_dimension: int = 1200) -> str:
|
||||
if not isinstance(url, str):
|
||||
url = str(url)
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
if not download_path or not os.path.isdir(os.path.dirname(download_path)):
|
||||
_, temp_file_extension = os.path.splitext(parsed_url.path)
|
||||
if not temp_file_extension:
|
||||
temp_file_extension = ".jpg" # Default extension if none is found
|
||||
download_path = tempfile.mktemp(suffix=temp_file_extension, prefix="download_")
|
||||
|
||||
if url and parsed_url.scheme and parsed_url.netloc:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(download_path), exist_ok=True)
|
||||
with requests.get(url) as response:
|
||||
response.raise_for_status() # Raises an HTTPError if the response was an error
|
||||
image = Image.open(BytesIO(response.content))
|
||||
|
||||
# Resize the image, preserving aspect ratio
|
||||
if max(image.size) > max_dimension:
|
||||
image.thumbnail((max_dimension, max_dimension))
|
||||
|
||||
# Save the image, preserving the original format if possible
|
||||
image_format = image.format if image.format else "jpg"
|
||||
image.save(download_path, image_format)
|
||||
|
||||
return download_path
|
||||
except Exception as e:
|
||||
# Handle or log the error as needed
|
||||
DEBUG(f"Error downloading or resizing image: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def comment_on_user_media(user: str, comment_type: str = "default", amount=5):
|
||||
"""
|
||||
Comment on a user's media.
|
||||
"""
|
||||
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr']
|
||||
medias = get_user_media(user, amount)
|
||||
for media in medias:
|
||||
if not check_media_in_completed_lists(media):
|
||||
sleep(SHORT)
|
||||
if media.thumbnail_url and is_valid_url(media.thumbnail_url):
|
||||
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
|
||||
if media_path is not None:
|
||||
encoded_media = encode_image_to_base64(media_path)
|
||||
comment_text = llava(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
|
||||
if comment_text:
|
||||
cl.media_comment(media.pk, comment_text)
|
||||
DEBUG(f"Commented on media: {media.pk}")
|
||||
else:
|
||||
DEBUG(f"Failed to generate comment for media: {media.pk}")
|
||||
add_media_to_completed_lists(media)
|
||||
sleep(SHORT)
|
||||
else:
|
||||
DEBUG(f"We received a nonetype! {media_path}")
|
||||
else:
|
||||
DEBUG(f"URL for {media.pk} disappeared it seems...")
|
||||
else:
|
||||
DEBUG(f"Media already interacted with: {media.pk}")
|
||||
|
||||
def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, hashtag: str = None):
|
||||
"""
|
||||
Comment on a hashtag's media.
|
||||
"""
|
||||
if not hashtag:
|
||||
hashtag = random.choice(PROFILE_CONFIG['comments'][comment_type]['hashtags'])
|
||||
|
||||
medias = get_medias_by_hashtag(hashtag=hashtag, days_ago_max=7, amount=amount)
|
||||
|
||||
for media in medias:
|
||||
if not check_media_in_completed_lists(media):
|
||||
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
|
||||
comment_text = None
|
||||
|
||||
if media_path and os.path.exists(media_path):
|
||||
encoded_media = encode_image_to_base64(media_path)
|
||||
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
|
||||
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
|
||||
|
||||
if (PROFILE_CONFIG['comments'][comment_type]['sentiment'] == "positive") and False is True:
|
||||
try:
|
||||
like_result = cl.media_like(media)
|
||||
if like_result:
|
||||
DEBUG(f"Liked media: https://instagram.com/p/{media.pk}/")
|
||||
except instagrapi.exceptions.FeedbackRequired as e:
|
||||
DEBUG(f"Cannot like media {media.pk}: {str(e)}")
|
||||
|
||||
if comment_text:
|
||||
try:
|
||||
cl.media_comment(media.pk, comment_text)
|
||||
DEBUG(f"Commented on media: https://instagram.com/p/{media.pk}/")
|
||||
except instagrapi.exceptions.FeedbackRequired as e:
|
||||
DEBUG(f"Cannot comment on media {media.pk}: {str(e)}")
|
||||
else:
|
||||
DEBUG(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}")
|
||||
add_media_to_completed_lists(media)
|
||||
sleep(SHORT)
|
||||
else:
|
||||
DEBUG(f"Media already interacted with: {media.pk}")
|
||||
|
||||
|
||||
def comment_on_specific_media(media_url, comment_type: str = "default"):
|
||||
"""
|
||||
Comment on a specific media given its URL.
|
||||
"""
|
||||
media_id = cl.media_pk_from_url(media_url)
|
||||
sleep(SHORT)
|
||||
media = cl.media_info(media_id)
|
||||
sleep(SHORT)
|
||||
|
||||
media_path = download_and_resize_image(media.thumbnail_url, f"{IG_VIEWED_IMAGES_DIR}/{media.pk}.jpg")
|
||||
encoded_media = encode_image_to_base64(media_path)
|
||||
|
||||
comment_prompt_usr = PROFILE_CONFIG['comments'][comment_type]['img_comment_usr'] + " For reference, here is the description that was posted with this image: " + media.caption_text
|
||||
comment_text = llava(encoded_media, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
|
||||
|
||||
if comment_text:
|
||||
try:
|
||||
cl.media_comment(media.pk, comment_text)
|
||||
DEBUG(f"Commented on specific media: https://instagram.com/p/{media.pk}/")
|
||||
except instagrapi.exceptions.FeedbackRequired as e:
|
||||
DEBUG(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}")
|
||||
else:
|
||||
DEBUG(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/")
|
||||
|
||||
|
||||
|
||||
def get_image(status_data, key):
|
||||
"""Extract the filename and subfolder from the status data and read the file."""
|
||||
try:
|
||||
outputs = status_data.get("outputs", {})
|
||||
images_info = outputs.get(key, {}).get("images", [])
|
||||
if not images_info:
|
||||
raise Exception("No images found in the job output.")
|
||||
|
||||
image_info = images_info[0] # Assuming the first image is the target
|
||||
filename = image_info.get("filename")
|
||||
subfolder = image_info.get("subfolder", "") # Default to empty if not present
|
||||
file_path = os.path.join(COMFYUI_OUTPUT_DIR, subfolder, filename)
|
||||
|
||||
with open(file_path, 'rb') as file:
|
||||
return file.read()
|
||||
except KeyError as e:
|
||||
raise Exception(f"Failed to extract image information due to missing key: {e}")
|
||||
except FileNotFoundError:
|
||||
raise Exception(f"File {filename} not found at the expected path {file_path}")
|
||||
|
||||
|
||||
def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], path=None):
|
||||
if path is None:
|
||||
path = []
|
||||
|
||||
try:
|
||||
if isinstance(workflow, dict):
|
||||
for key, value in workflow.items():
|
||||
current_path = path + [key]
|
||||
|
||||
if isinstance(value, dict):
|
||||
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
|
||||
found_key[0] = key
|
||||
update_prompt(value, post, positive, found_key, current_path)
|
||||
elif isinstance(value, list):
|
||||
# Recursive call with updated path for each item in a list
|
||||
for index, item in enumerate(value):
|
||||
update_prompt(item, post, positive, found_key, current_path + [str(index)])
|
||||
|
||||
if value == "API_PPrompt":
|
||||
workflow[key] = post.get(value, "") + positive
|
||||
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
|
||||
elif value == "API_SPrompt":
|
||||
workflow[key] = post.get(value, "")
|
||||
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
|
||||
elif value == "API_NPrompt":
|
||||
workflow[key] = post.get(value, "")
|
||||
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
|
||||
elif key == "seed" or key == "noise_seed":
|
||||
workflow[key] = random.randint(1000000000000, 9999999999999)
|
||||
DEBUG(f"Updated seed to: {workflow[key]}")
|
||||
elif (key == "width" or key == "max_width" or key == "scaled_width" or key == "side_length") and (value == 1023 or value == 1025):
|
||||
# workflow[key] = post.get(value, "")
|
||||
workflow[key] = post.get("width", 1024)
|
||||
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
|
||||
# workflow[key] = post.get(value, "")
|
||||
workflow[key] = post.get("height", 1024)
|
||||
except Exception as e:
|
||||
DEBUG(f"Error in update_prompt at path {' -> '.join(path)}: {e}")
|
||||
raise
|
||||
|
||||
return found_key[0]
|
||||
|
||||
def update_prompt_custom(workflow: dict, API_PPrompt: str, API_SPrompt: str, API_NPrompt: str, found_key=[None], path=None):
|
||||
if path is None:
|
||||
path = []
|
||||
|
||||
try:
|
||||
if isinstance(workflow, dict):
|
||||
for key, value in workflow.items():
|
||||
current_path = path + [key]
|
||||
|
||||
if isinstance(value, dict):
|
||||
if value.get('class_type') == 'SaveImage' and value.get('inputs', {}).get('filename_prefix') == 'API_':
|
||||
found_key[0] = key
|
||||
update_prompt(value, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path)
|
||||
elif isinstance(value, list):
|
||||
# Recursive call with updated path for each item in a list
|
||||
for index, item in enumerate(value):
|
||||
update_prompt(item, API_PPrompt, API_SPrompt, API_NPrompt, found_key, current_path + [str(index)])
|
||||
|
||||
if value == "API_PPrompt":
|
||||
workflow[key] = API_PPrompt
|
||||
DEBUG(f"Updated API_PPrompt to: {workflow[key]}")
|
||||
elif value == "API_SPrompt":
|
||||
workflow[key] = API_SPrompt
|
||||
DEBUG(f"Updated API_SPrompt to: {workflow[key]}")
|
||||
elif value == "API_NPrompt":
|
||||
workflow[key] = API_NPrompt
|
||||
DEBUG(f"Updated API_NPrompt to: {workflow[key]}")
|
||||
elif key == "seed" or key == "noise_seed":
|
||||
workflow[key] = random.randint(1000000000000, 9999999999999)
|
||||
DEBUG(f"Updated seed to: {workflow[key]}")
|
||||
elif (key == "width" or key == "max_width" or key == "scaled_width") and (value == 1023 or value == 1025):
|
||||
workflow[key] = 1024
|
||||
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
|
||||
workflow[key] = 1024
|
||||
except Exception as e:
|
||||
DEBUG(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}")
|
||||
raise
|
||||
|
||||
return found_key[0]
|
||||
|
||||
|
||||
##################################
|
||||
### IMAGE GENERATION FUNCTIONS ###
|
||||
##################################
|
||||
|
||||
|
||||
def image_gen(prompt: str, model: str):
|
||||
|
||||
response = IMG_GEN.images.generate(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
size="1024x1024",
|
||||
quality="standard",
|
||||
n=1,
|
||||
)
|
||||
|
||||
image_url = response.data[0].url
|
||||
image_path = download_and_resize_image(image_url)
|
||||
return image_path
|
||||
|
||||
|
||||
def queue_prompt(prompt: dict):
|
||||
response = requests.post(f"{COMFYUI_URL}/prompt", json={"prompt": prompt, "client_id": CLIENT_ID})
|
||||
if response.status_code == 200:
|
||||
return response.json().get('prompt_id')
|
||||
else:
|
||||
raise Exception(f"Failed to queue prompt. Status code: {response.status_code}, Response body: {response.text}")
|
||||
|
||||
def poll_status(prompt_id):
|
||||
"""Poll the job status until it's complete and return the status data."""
|
||||
start_time = time.time() # Record the start time
|
||||
while True:
|
||||
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
|
||||
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
|
||||
# Use \r to return to the start of the line, and end='' to prevent newline
|
||||
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
|
||||
if status_response.status_code != 200:
|
||||
raise Exception("Failed to get job status")
|
||||
status_data = status_response.json()
|
||||
job_data = status_data.get(prompt_id, {})
|
||||
if job_data.get("status", {}).get("completed", False):
|
||||
DEBUG()
|
||||
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||
return job_data
|
||||
time.sleep(1)
|
||||
|
||||
def poll_status(prompt_id):
|
||||
"""Poll the job status until it's complete and return the status data."""
|
||||
start_time = time.time() # Record the start time
|
||||
while True:
|
||||
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
|
||||
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
|
||||
# Use \r to return to the start of the line, and end='' to prevent newline
|
||||
DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
|
||||
if status_response.status_code != 200:
|
||||
raise Exception("Failed to get job status")
|
||||
status_data = status_response.json()
|
||||
job_data = status_data.get(prompt_id, {})
|
||||
if job_data.get("status", {}).get("completed", False):
|
||||
DEBUG()
|
||||
DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||
return job_data
|
||||
time.sleep(1)
|
||||
|
||||
################################
|
||||
### PRIMARY ACTIVE FUNCTIONS ###
|
||||
################################
|
||||
|
||||
def load_post(chosen_post: str = "default"):
|
||||
if chosen_post in PROFILE_CONFIG['posts']:
|
||||
post = PROFILE_CONFIG['posts'][chosen_post]
|
||||
DEBUG(f"Loaded post for {chosen_post}")
|
||||
else:
|
||||
DEBUG(f"Unable to load post for {chosen_post}. Choosing a default post.")
|
||||
chosen_post = choose_post(PROFILE_CONFIG['posts'])
|
||||
post = PROFILE_CONFIG['posts'][chosen_post]
|
||||
DEBUG(f"Defaulted to {chosen_post}")
|
||||
|
||||
return post
|
||||
|
||||
def handle_image_workflow(chosen_post=None):
|
||||
"""
|
||||
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
|
||||
or posting to Instagram based on the local flag.
|
||||
"""
|
||||
if chosen_post is None:
|
||||
chosen_post = choose_post(PROFILE_CONFIG['posts'])
|
||||
|
||||
post = load_post(chosen_post)
|
||||
|
||||
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
|
||||
|
||||
DEBUG(f"Workflow name: {workflow_name}")
|
||||
|
||||
DEBUG(f"Generating image concept for {chosen_post} and {workflow_name} now.")
|
||||
image_concept = query_ollama(llmPrompt = post['llmPrompt'], max_tokens = 180) if args.local or not args.openai else query_gpt4(llmPrompt = post['llmPrompt'], max_tokens = 180)
|
||||
|
||||
DEBUG(f"Image concept for {chosen_post}: {image_concept}")
|
||||
|
||||
workflow_data = None
|
||||
|
||||
if args.fast:
|
||||
workflow_data = load_json(None, f"{workflow_name}_fast")
|
||||
|
||||
if workflow_data is None:
|
||||
workflow_data = load_json(None, workflow_name)
|
||||
|
||||
if args.dalle and not args.local:
|
||||
jpg_file_path = image_gen(image_concept, "dall-e-3")
|
||||
else:
|
||||
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
|
||||
DEBUG(f"Saved file key: {saved_file_key}")
|
||||
prompt_id = queue_prompt(workflow_data)
|
||||
DEBUG(f"Prompt ID: {prompt_id}")
|
||||
status_data = poll_status(prompt_id)
|
||||
image_data = get_image(status_data, saved_file_key)
|
||||
if chosen_post == "landscape":
|
||||
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 2880, 100)
|
||||
else:
|
||||
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
|
||||
|
||||
image_aftergen(jpg_file_path, chosen_post)
|
||||
|
||||
def handle_custom_image(custom_post: str):
|
||||
"""
|
||||
Orchestrates the workflow from prompt update, image generation, to either saving the image and description locally
|
||||
or posting to Instagram based on the local flag.
|
||||
"""
|
||||
if args.posttype:
|
||||
post = load_post(args.posttype)
|
||||
workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
|
||||
|
||||
else:
|
||||
workflow_name = args.workflow if args.workflow else "selfie"
|
||||
post = {
|
||||
"API_PPrompt": "",
|
||||
"API_SPrompt": "; (((masterpiece))); (beautiful lighting:1), subdued, fine detail, extremely sharp, 8k, insane detail, dynamic lighting, cinematic, best quality, ultra detailed.",
|
||||
"API_NPrompt": "canvas frame, 3d, ((bad art)), illustrated, deformed, blurry, duplicate, bad art, bad anatomy, worst quality, low quality, watermark, FastNegativeV2, (easynegative:0.5), epiCNegative, easynegative, verybadimagenegative_v1.3",
|
||||
"Vision_Prompt": "Write an upbeat Instagram description with emojis to accompany this selfie!",
|
||||
"frequency": 2,
|
||||
"ghost_tags": [
|
||||
"aigenerated",
|
||||
"stablediffusion",
|
||||
"sdxl",
|
||||
],
|
||||
}
|
||||
|
||||
workflow_data = load_json(None, workflow_name)
|
||||
|
||||
system_msg = "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words."
|
||||
image_concept = query_ollama(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) if args.local or not args.openai else query_gpt4(system_msg=system_msg, user_msg=custom_post, max_tokens = 180)
|
||||
|
||||
DEBUG(f"Image concept: {image_concept}")
|
||||
|
||||
if args.dalle and not args.local:
|
||||
jpg_file_path = image_gen(image_concept, "dall-e-3")
|
||||
|
||||
else:
|
||||
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
|
||||
DEBUG(f"Saved file key: {saved_file_key}")
|
||||
|
||||
prompt_id = queue_prompt(workflow_data)
|
||||
DEBUG(f"Prompt ID: {prompt_id}")
|
||||
|
||||
status_data = poll_status(prompt_id)
|
||||
image_data = get_image(status_data, saved_file_key)
|
||||
chosen_post = args.posttype if args.posttype else "custom"
|
||||
jpg_file_path = save_as_jpg(image_data, prompt_id, chosen_post, 1440, 90)
|
||||
|
||||
encoded_string = encode_image_to_base64(jpg_file_path)
|
||||
vision_prompt = f"Write upbeat Instagram description accompany this image, which was created by AI using the following prompt: {image_concept}"
|
||||
instagram_description = llava(encoded_string, vision_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, vision_prompt, 150)
|
||||
|
||||
|
||||
image_aftergen(jpg_file_path, chosen_post, )
|
||||
|
||||
|
||||
def image_aftergen(jpg_file_path: str, chosen_post: str = None, post: Dict = None, prompt: str = None):
|
||||
if chosen_post and not prompt:
|
||||
prompt = PROFILE_CONFIG['posts'][chosen_post]['Vision_Prompt']
|
||||
encoded_string = encode_image_to_base64(jpg_file_path)
|
||||
DEBUG(f"Image successfully encoded from {jpg_file_path}")
|
||||
instagram_description = llava(encoded_string, prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, prompt, 150)
|
||||
instagram_description = re.sub(r'^["\'](.*)["\']$', r'\1', instagram_description)
|
||||
|
||||
ghost_tags = post['ghost_tags'] if post else PROFILE_CONFIG['posts'][chosen_post]['ghost_tags']
|
||||
|
||||
title_prompt = f"Generate a short 3-5 word title for this image, which already includes the following description: {instagram_description}"
|
||||
|
||||
# Generate img_title based on the condition provided
|
||||
img_title = llava(encoded_string, title_prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, title_prompt, 150)
|
||||
img_title = re.sub(r'^["\'](.*)["\']$', r'\1', img_title)
|
||||
|
||||
# Save description to file and upload or save locally
|
||||
description_filename = jpg_file_path.rsplit('.', 1)[0] + ".txt"
|
||||
description_path = os.path.join(IG_IMAGES_DIR, description_filename)
|
||||
with open(description_path, "w") as desc_file:
|
||||
desc_file.write(instagram_description)
|
||||
|
||||
# Initial markdown content creation
|
||||
markdown_filename = jpg_file_path.rsplit('.', 1)[0] + ".md"
|
||||
markdown_content = f"""# {img_title}
|
||||
|
||||

|
||||
---
|
||||
{instagram_description}
|
||||
---
|
||||
Tags: {', '.join(ghost_tags)}
|
||||
"""
|
||||
with open(markdown_filename, "w") as md_file:
|
||||
md_file.write(markdown_content)
|
||||
|
||||
DEBUG(f"Markdown file created at {markdown_filename}")
|
||||
|
||||
if args.wallpaper:
|
||||
change_wallpaper(jpg_file_path)
|
||||
DEBUG(f"Wallpaper changed.")
|
||||
|
||||
|
||||
if not args.local:
|
||||
ig_footer = ""
|
||||
if not args.noig:
|
||||
post_url = upload_photo(jpg_file_path, instagram_description)
|
||||
DEBUG(f"Image posted at {post_url}")
|
||||
ig_footer = f"\n<a href=\"{post_url}\">Instagram link</a>"
|
||||
|
||||
if not args.noghost:
|
||||
ghost_text = f"{instagram_description}"
|
||||
ghost_url = post_to_ghost(img_title, jpg_file_path, ghost_text, ghost_tags)
|
||||
DEBUG(f"Ghost post: {ghost_url}\n{ig_footer}")
|
||||
|
||||
|
||||
def choose_post(posts):
|
||||
total_frequency = sum(posts[post_type]['frequency'] for post_type in posts)
|
||||
random_choice = random.randint(1, total_frequency)
|
||||
current_sum = 0
|
||||
|
||||
for post_type, post_info in posts.items():
|
||||
current_sum += post_info['frequency']
|
||||
if random_choice <= current_sum:
|
||||
return post_type
|
||||
|
||||
def load_json(json_payload, workflow):
|
||||
if json_payload:
|
||||
return json.loads(json_payload)
|
||||
elif workflow:
|
||||
workflow_path = os.path.join(SD_WORKFLOWS_DIR, f"{workflow}.json" if not workflow.endswith('.json') else workflow)
|
||||
with open(workflow_path, 'r') as file:
|
||||
return json.load(file)
|
||||
else:
|
||||
raise ValueError("No valid input provided.")
|
||||
|
||||
|
||||
|
||||
|
||||
def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, quality=80):
|
||||
chosen_post = chosen_post if chosen_post else "custom"
|
||||
filename_png = f"{prompt_id}.png"
|
||||
category_dir = os.path.join(IG_IMAGES_DIR, chosen_post)
|
||||
image_path_png = os.path.join(category_dir, filename_png)
|
||||
|
||||
try:
|
||||
# Ensure the directory exists
|
||||
os.makedirs(category_dir, exist_ok=True)
|
||||
|
||||
# Save the raw PNG data to a file
|
||||
with open(image_path_png, 'wb') as file:
|
||||
file.write(image_data)
|
||||
|
||||
# Open the PNG, resize it, and save it as jpg
|
||||
with Image.open(image_path_png) as img:
|
||||
# Resize image if necessary
|
||||
if max(img.size) > max_size:
|
||||
ratio = max_size / max(img.size)
|
||||
new_size = tuple([int(x * ratio) for x in img.size])
|
||||
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
||||
|
||||
# Prepare the path for the converted image
|
||||
new_file_name = f"{prompt_id}.jpg"
|
||||
new_file_path = os.path.join(category_dir, new_file_name)
|
||||
|
||||
# Convert to jpg and save
|
||||
img.convert('RGB').save(new_file_path, format='JPEG', quality=quality)
|
||||
|
||||
# Optionally, delete the temporary PNG file
|
||||
os.remove(image_path_png)
|
||||
|
||||
return new_file_path
|
||||
except Exception as e:
|
||||
DEBUG(f"Error processing image: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def upload_photo(path, caption, title: str=None):
|
||||
DEBUG(f"Uploading photo from {path}...")
|
||||
media = cl.photo_upload(path, caption)
|
||||
post_url = f"https://www.instagram.com/p/{media.code}/"
|
||||
return post_url
|
||||
|
||||
def format_duration(seconds):
|
||||
"""Return a string representing the duration in a human-readable format."""
|
||||
if seconds < 120:
|
||||
return f"{int(seconds)} sec"
|
||||
elif seconds < 6400:
|
||||
return f"{int(seconds // 60)} min"
|
||||
else:
|
||||
return f"{seconds / 3600:.2f} hr"
|
||||
|
||||
########################
|
||||
### HELPER FUNCTIONS ###
|
||||
########################
|
||||
|
||||
import subprocess
|
||||
|
||||
def change_wallpaper(image_path):
|
||||
command = """
|
||||
osascript -e 'tell application "Finder" to set desktop picture to POSIX file "{}"'
|
||||
""".format(image_path)
|
||||
subprocess.run(command, shell=True)
|
||||
|
||||
|
||||
def sleep(seconds):
|
||||
"""Sleep for a random amount of time, approximately the given number of seconds."""
|
||||
sleepupto(seconds*0.66, seconds*1.5)
|
||||
|
||||
def sleepupto(min_seconds, max_seconds=None):
|
||||
interval = random.uniform(min_seconds if max_seconds is not None else 0, max_seconds if max_seconds is not None else min_seconds)
|
||||
start_time = time.time()
|
||||
end_time = start_time + interval
|
||||
|
||||
with tqdm(total=interval, desc=f"Sleeping for {format_duration(interval)}", unit=" sec", ncols=75, bar_format='{desc}: {bar} {remaining}') as pbar:
|
||||
while True:
|
||||
current_time = time.time()
|
||||
elapsed_time = current_time - start_time
|
||||
remaining_time = end_time - current_time
|
||||
if elapsed_time >= interval:
|
||||
break
|
||||
duration = min(1, interval - elapsed_time) # Adjust sleep time to not exceed interval
|
||||
time.sleep(duration)
|
||||
pbar.update(duration)
|
||||
# Update remaining time display
|
||||
pbar.set_postfix_str(f"{format_duration(remaining_time)} remaining")
|
||||
|
||||
|
||||
########################
|
||||
### GHOST FUNCTIONS ###
|
||||
########################
|
||||
|
||||
|
||||
|
||||
def generate_jwt_token():
|
||||
key_id, key_secret = GHOST_API_KEY.split(':')
|
||||
iat = int(date.now().timestamp())
|
||||
exp = iat + 5 * 60 # Token expiration time set to 5 minutes from now for consistency with the working script
|
||||
payload = {
|
||||
'iat': iat,
|
||||
'exp': exp,
|
||||
'aud': '/admin/' # Adjusted to match the working script
|
||||
}
|
||||
token = jwt.encode(payload, bytes.fromhex(key_secret), algorithm='HS256', headers={'kid': key_id})
|
||||
return token.decode('utf-8') if isinstance(token, bytes) else token # Ensure the token is decoded to UTF-8 string
|
||||
|
||||
|
||||
def post_to_ghost(title, image_path, html_content, ghost_tags):
|
||||
jwt_token = generate_jwt_token()
|
||||
ghost_headers = {'Authorization': f'Ghost {jwt_token}'}
|
||||
|
||||
# Upload the image to Ghost
|
||||
with open(image_path, 'rb') as f:
|
||||
files = {'file': (os.path.basename(image_path), f, 'image/jpg')}
|
||||
image_response = requests.post(f"{GHOST_API_URL}/images/upload/", headers=ghost_headers, files=files)
|
||||
image_response.raise_for_status() # Ensure the request was successful
|
||||
image_url = image_response.json()['images'][0]['url']
|
||||
|
||||
# Prepare the post content
|
||||
updated_html_content = f'<img src="{image_url}" alt="Image"/><hr/> {html_content}'
|
||||
mobiledoc = {
|
||||
"version": "0.3.1",
|
||||
"atoms": [],
|
||||
"cards": [["html", {"cardName": "html", "html": updated_html_content}]],
|
||||
"markups": [],
|
||||
"sections": [[10, 0]]
|
||||
}
|
||||
mobiledoc = json.dumps(mobiledoc)
|
||||
|
||||
post_data = {
|
||||
'posts': [{
|
||||
'title': title,
|
||||
'mobiledoc': mobiledoc,
|
||||
'status': 'published',
|
||||
'tags': ghost_tags
|
||||
}]
|
||||
}
|
||||
|
||||
# Create a new post
|
||||
post_response = requests.post(f"{GHOST_API_URL}/posts/", json=post_data, headers=ghost_headers)
|
||||
post_response.raise_for_status()
|
||||
post_url = post_response.json()['posts'][0]['url']
|
||||
|
||||
return post_url
|
||||
|
||||
|
||||
|
||||
########################################################
|
||||
@ig.post("/ig/flow")
|
||||
async def ig_flow_endpoint(new_session: bool = False):
|
||||
current_unix_time = int(date.now().timestamp())
|
||||
time_since_rollover = current_unix_time - rollover_time
|
||||
time_remaining = 30 - (time_since_rollover % 30)
|
||||
|
||||
if time_remaining < 4:
|
||||
DEBUG("Too close to end of TOTP counter. Waiting.")
|
||||
sleepupto(5, 5)
|
||||
|
||||
if not new_session and os.path.exists(IG_SESSION_PATH):
|
||||
cl.load_settings(IG_SESSION_PATH)
|
||||
DEBUG("Loaded past session.")
|
||||
|
||||
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
|
||||
cl.dump_settings(IG_SESSION_PATH)
|
||||
DEBUG("Logged in and saved new session.")
|
||||
|
||||
else:
|
||||
raise Exception(f"Failed to login as {IG_USERNAME}.")
|
484
sijapi/routers/llm.py
Normal file
484
sijapi/routers/llm.py
Normal file
|
@ -0,0 +1,484 @@
|
|||
#routers/llm.py
|
||||
from fastapi import APIRouter, HTTPException, Request, Response
|
||||
from fastapi.responses import StreamingResponse, JSONResponse
|
||||
from starlette.responses import StreamingResponse
|
||||
from datetime import datetime as dt_datetime
|
||||
from dateutil import parser
|
||||
from typing import List, Dict, Any, Union
|
||||
from pydantic import BaseModel, root_validator, ValidationError
|
||||
import aiofiles
|
||||
import os
|
||||
import glob
|
||||
import chromadb
|
||||
from openai import OpenAI
|
||||
import uuid
|
||||
import json
|
||||
import base64
|
||||
from pathlib import Path
|
||||
import ollama
|
||||
from ollama import AsyncClient as Ollama, list as OllamaList
|
||||
import aiofiles
|
||||
import time
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from fastapi import FastAPI, Request, HTTPException, APIRouter
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from dotenv import load_dotenv
|
||||
from sijapi import BASE_DIR, DATA_DIR, LOGS_DIR, CONFIG_DIR, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi.utilities import convert_to_unix_time, sanitize_filename
|
||||
|
||||
llm = APIRouter()
|
||||
|
||||
|
||||
|
||||
# Initialize chromadb client
|
||||
client = chromadb.Client()
|
||||
OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
|
||||
|
||||
# Function to read all markdown files in the folder
|
||||
def read_markdown_files(folder: Path):
|
||||
file_paths = glob.glob(os.path.join(folder, "*.md"))
|
||||
documents = []
|
||||
for file_path in file_paths:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
documents.append(file.read())
|
||||
return documents, file_paths
|
||||
|
||||
# Read markdown files and generate embeddings
|
||||
documents, file_paths = read_markdown_files(DOC_DIR)
|
||||
for i, doc in enumerate(documents):
|
||||
response = ollama.embeddings(model="mxbai-embed-large", prompt=doc)
|
||||
embedding = response["embedding"]
|
||||
OBSIDIAN_CHROMADB_COLLECTION.add(
|
||||
ids=[file_paths[i]],
|
||||
embeddings=[embedding],
|
||||
documents=[doc]
|
||||
)
|
||||
|
||||
# Function to retrieve the most relevant document given a prompt
|
||||
@llm.get("/retrieve_document/{prompt}")
|
||||
async def retrieve_document(prompt: str):
|
||||
response = ollama.embeddings(
|
||||
prompt=prompt,
|
||||
model="mxbai-embed-large"
|
||||
)
|
||||
results = OBSIDIAN_CHROMADB_COLLECTION.query(
|
||||
query_embeddings=[response["embedding"]],
|
||||
n_results=1
|
||||
)
|
||||
return {"document": results['documents'][0][0]}
|
||||
|
||||
# Function to generate a response using RAG
|
||||
@llm.get("/generate_response/{prompt}")
|
||||
async def generate_response(prompt: str):
|
||||
data = await retrieve_document(prompt)
|
||||
output = ollama.generate(
|
||||
model="llama2",
|
||||
prompt=f"Using this data: {data['document']}. Respond to this prompt: {prompt}"
|
||||
)
|
||||
return {"response": output['response']}
|
||||
|
||||
|
||||
async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, max_tokens: int = 200):
|
||||
messages = [{"role": "system", "content": sys},
|
||||
{"role": "user", "content": usr}]
|
||||
LLM = Ollama()
|
||||
response = await LLM.chat(model=DEFAULT_LLM, messages=messages, options={"num_predict": max_tokens})
|
||||
|
||||
DEBUG(response)
|
||||
if "message" in response:
|
||||
if "content" in response["message"]:
|
||||
content = response["message"]["content"]
|
||||
return content
|
||||
else:
|
||||
DEBUG("No choices found in response")
|
||||
return None
|
||||
|
||||
def is_vision_request(content):
|
||||
return False
|
||||
|
||||
@llm.post("/v1/chat/completions")
|
||||
async def chat_completions(request: Request):
|
||||
body = await request.json()
|
||||
|
||||
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f")
|
||||
filename = REQUESTS_DIR / f"request_{timestamp}.json"
|
||||
|
||||
async with aiofiles.open(filename, mode='w') as file:
|
||||
await file.write(json.dumps(body, indent=4))
|
||||
|
||||
messages = body.get('messages')
|
||||
if not messages:
|
||||
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
|
||||
|
||||
requested_model = body.get('model', 'default-model')
|
||||
DEBUG(f"Requested model: {requested_model}")
|
||||
stream = body.get('stream')
|
||||
token_limit = body.get('max_tokens') or body.get('num_predict')
|
||||
|
||||
# Check if the most recent message contains an image_url
|
||||
recent_message = messages[-1]
|
||||
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
|
||||
DEBUG("Processing as a vision request")
|
||||
model = "llava"
|
||||
DEBUG(f"Using model: {model}")
|
||||
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
|
||||
else:
|
||||
DEBUG("Processing as a standard request")
|
||||
model = requested_model
|
||||
DEBUG(f"Using model: {model}")
|
||||
if stream:
|
||||
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
|
||||
else:
|
||||
response_data = await generate_messages(messages, model)
|
||||
return JSONResponse(response_data, media_type="application/json")
|
||||
|
||||
async def stream_messages(messages: list, model: str = "llama3", num_predict: int = 300):
|
||||
async with Ollama() as async_client:
|
||||
try:
|
||||
index = 0
|
||||
async for part in async_client.chat(model=model, messages=messages, stream=True, options={'num_predict': num_predict}):
|
||||
yield "data: " + json.dumps({
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"system_fingerprint": "fp_44709d6fcb",
|
||||
"choices": [{
|
||||
"index": index,
|
||||
"delta": {"role": "assistant", "content": part['message']['content']},
|
||||
"logprobs": None,
|
||||
"finish_reason": None if 'finish_reason' not in part else part['finish_reason']
|
||||
}]
|
||||
}) + "\n\n"
|
||||
index += 1
|
||||
except Exception as e:
|
||||
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
|
||||
async def stream_messages_with_vision(message: dict, model: str, num_predict: int = 300):
|
||||
async with Ollama() as async_client:
|
||||
try:
|
||||
if isinstance(message.get('content'), list):
|
||||
content = message['content']
|
||||
for part in content:
|
||||
if part['type'] == 'image_url' and 'url' in part['image_url']:
|
||||
image_url = part['image_url']['url']
|
||||
if image_url.startswith('data:image'):
|
||||
# Convert base64 to bytes
|
||||
image_data = base64.b64decode(image_url.split('base64,')[1])
|
||||
response_generator = await async_client.generate(
|
||||
model=model,
|
||||
prompt='explain this image:',
|
||||
images=[image_data],
|
||||
stream=True,
|
||||
options={'num_predict': num_predict}
|
||||
)
|
||||
index = 0
|
||||
async for response in response_generator:
|
||||
yield "data: " + json.dumps({
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"system_fingerprint": "fp_44709d6fcb",
|
||||
"choices": [{
|
||||
"index": index,
|
||||
"delta": {"role": "assistant", "content": response['response']},
|
||||
"logprobs": None,
|
||||
"finish_reason": None if 'finish_reason' not in response else response['finish_reason']
|
||||
}]
|
||||
}) + "\n\n"
|
||||
index += 1
|
||||
except Exception as e:
|
||||
yield "data: " + json.dumps({"error": f"Error: {str(e)}"}) + "\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
def get_appropriate_model(requested_model):
|
||||
if requested_model == "gpt-4-vision-preview":
|
||||
return DEFAULT_VISION
|
||||
elif not is_model_available(requested_model):
|
||||
return DEFAULT_LLM
|
||||
else:
|
||||
return requested_model
|
||||
|
||||
def is_vision_request(content):
|
||||
if isinstance(content, list):
|
||||
return any(isinstance(msg, dict) and msg.get('type') == 'image_url' for msg in content)
|
||||
return False
|
||||
|
||||
|
||||
@llm.get("/v1/models")
|
||||
async def get_models():
|
||||
model_data = OllamaList()
|
||||
formatted_models = []
|
||||
|
||||
for model in model_data['models']:
|
||||
model_id = model['name'].split(':')[0]
|
||||
formatted_models.append({
|
||||
"id": model_id,
|
||||
"object": "model",
|
||||
"created": convert_to_unix_time(model['modified_at']),
|
||||
"owned_by": "sij"
|
||||
})
|
||||
|
||||
return JSONResponse({
|
||||
"object": "list",
|
||||
"data": formatted_models
|
||||
})
|
||||
|
||||
async def generate_messages(messages: list, model: str = "llama3"):
|
||||
async_client = Ollama()
|
||||
try:
|
||||
response = await async_client.chat(model=model, messages=messages, stream=False)
|
||||
return {
|
||||
"model": model,
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": response['message']['content']
|
||||
}
|
||||
}]
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": f"Error: {str(e)}"}
|
||||
|
||||
|
||||
|
||||
def is_model_available(model_name):
|
||||
model_data = OllamaList()
|
||||
available_models = [model['name'] for model in model_data['models']]
|
||||
DEBUG(f"Available models: {available_models}") # Log using the configured LOGGER
|
||||
|
||||
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
|
||||
if len(matching_models) == 1:
|
||||
DEBUG(f"Unique match found: {matching_models[0]}")
|
||||
return True
|
||||
elif len(matching_models) > 1:
|
||||
ERR(f"Ambiguous match found, models: {matching_models}")
|
||||
return True
|
||||
else:
|
||||
ERR(f"No match found for model: {model_name}")
|
||||
return False
|
||||
|
||||
|
||||
@llm.options("/chat/completions")
|
||||
@llm.options("/v1/chat/completions")
|
||||
async def chat_completions_options(request: Request):
|
||||
return JSONResponse(
|
||||
content={
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "To use the chat completions endpoint, make a POST request to /v1/chat/completions with a JSON payload containing the 'messages' array. Each message should have a 'role' (either 'system', 'user', or 'assistant') and 'content' (the message text). You can optionally specify the 'model' to use. The response will be a JSON object containing the generated completions."
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"created": int(time.time()),
|
||||
"id": str(uuid.uuid4()),
|
||||
"model": DEFAULT_LLM,
|
||||
"object": "chat.completion.chunk",
|
||||
},
|
||||
status_code=200,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
"Allow": "OPTIONS, POST",
|
||||
},
|
||||
)
|
||||
|
||||
#### EMBEDDINGS
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
model: str
|
||||
input: Union[str, List[str], None] = None
|
||||
prompt: Union[str, List[str], None] = None
|
||||
|
||||
@root_validator(pre=True)
|
||||
def ensure_list(cls, values):
|
||||
input_value = values.get('input')
|
||||
prompt_value = values.get('prompt')
|
||||
|
||||
if input_value and isinstance(input_value, str):
|
||||
values['input'] = [input_value]
|
||||
|
||||
if prompt_value and isinstance(prompt_value, str):
|
||||
values['prompt'] = [prompt_value]
|
||||
|
||||
if input_value and not prompt_value:
|
||||
values['prompt'] = values['input']
|
||||
values['input'] = None
|
||||
|
||||
return values
|
||||
|
||||
class EmbeddingResponse(BaseModel):
|
||||
object: str
|
||||
data: List[Dict[str, Any]]
|
||||
model: str
|
||||
usage: Dict[str, int]
|
||||
|
||||
@llm.post("/api/embeddings", response_model=EmbeddingResponse)
|
||||
@llm.post("/v1/embeddings", response_model=EmbeddingResponse)
|
||||
async def create_embedding(request: EmbeddingRequest):
|
||||
try:
|
||||
combined_input = " ".join(request.prompt)
|
||||
response = ollama.embeddings(model=request.model, prompt=combined_input)
|
||||
embedding_list = response.get("embedding", [])
|
||||
|
||||
data = [{
|
||||
"object": "embedding",
|
||||
"index": 0,
|
||||
"embedding": embedding_list
|
||||
}]
|
||||
|
||||
result = {
|
||||
"object": "list",
|
||||
"data": data,
|
||||
"model": request.model,
|
||||
"usage": {"prompt_tokens": 5, "total_tokens": 5} # Example token counts
|
||||
}
|
||||
|
||||
return result
|
||||
except ValidationError as e:
|
||||
raise HTTPException(status_code=422, detail=e.errors())
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@llm.options("/api/embeddings")
|
||||
@llm.options("/v1/embeddings")
|
||||
async def options_embedding():
|
||||
return JSONResponse(
|
||||
content={},
|
||||
headers={
|
||||
"Allow": "OPTIONS, POST",
|
||||
"Content-Type": "application/json",
|
||||
"Access-Control-Allow-Methods": "OPTIONS, POST",
|
||||
"Access-Control-Allow-Headers": "Content-Type"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
###### PORTED FROM IGBOT, NEEDS TO BE UPDATED FOR THIS ENVIRONMENT AND MADE ASYNC: #####
|
||||
|
||||
def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", max_tokens: int = 150):
|
||||
messages = llmPrompt if llmPrompt else [
|
||||
{"role": "system", "content": system_msg},
|
||||
{"role": "user", "content": user_msg}
|
||||
]
|
||||
LLM = OpenAI(api_key=OPENAI_API_KEY)
|
||||
response = LLM.chat.completions.create(
|
||||
model="gpt-4",
|
||||
messages=messages,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
if hasattr(response, "choices") and response.choices: # Checks if 'choices' attribute exists and is not empty
|
||||
first_choice = response.choices[0]
|
||||
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
||||
return first_choice.message.content
|
||||
else:
|
||||
DEBUG("No content attribute in the first choice's message")
|
||||
DEBUG(f"No content found in message string: {response.choices}")
|
||||
DEBUG("Trying again!")
|
||||
query_gpt4(messages, max_tokens)
|
||||
else:
|
||||
DEBUG(f"No content found in message string: {response}")
|
||||
return ""
|
||||
|
||||
def llava(image_base64, prompt):
|
||||
VISION_LLM = Ollama(host='http://localhost:11434')
|
||||
response = VISION_LLM.generate(
|
||||
model = 'llava',
|
||||
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
|
||||
images = [image_base64]
|
||||
)
|
||||
DEBUG(response)
|
||||
return "" if "pass" in response["response"].lower() else response["response"]
|
||||
|
||||
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
|
||||
VISION_LLM = OpenAI(api_key=OPENAI_API_KEY)
|
||||
response_1 = VISION_LLM.chat.completions.create(
|
||||
model="gpt-4-vision-preview",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": f"{prompt_usr}"},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}}
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=max_tokens,
|
||||
stream=False
|
||||
)
|
||||
|
||||
if response_1 and response_1.choices:
|
||||
if len(response_1.choices) > 0:
|
||||
first_choice = response_1.choices[0]
|
||||
if first_choice.message and first_choice.message.content:
|
||||
comment_content = first_choice.message.content
|
||||
if "PASS" in comment_content:
|
||||
return ""
|
||||
DEBUG(f"Generated comment: {comment_content}")
|
||||
|
||||
response_2 = VISION_LLM.chat.completions.create(
|
||||
model="gpt-4-vision-preview",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt_sys}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": f"{prompt_usr}"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpg;base64,{image_base64}"
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": comment_content
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Please refine it, and remember to ONLY include the caption or comment, nothing else! That means no preface, no postscript, no notes, no reflections, and not even any acknowledgment of this follow-up message. I need to be able to use your output directly on social media. Do include emojis though."
|
||||
}
|
||||
],
|
||||
max_tokens=max_tokens,
|
||||
stream=False
|
||||
)
|
||||
if response_2 and response_2.choices:
|
||||
if len(response_2.choices) > 0:
|
||||
first_choice = response_2.choices[0]
|
||||
if first_choice.message and first_choice.message.content:
|
||||
final_content = first_choice.message.content
|
||||
DEBUG(f"Generated comment: {final_content}")
|
||||
if "PASS" in final_content:
|
||||
return ""
|
||||
else:
|
||||
return final_content
|
||||
|
||||
|
||||
DEBUG("Vision response did not contain expected data.")
|
||||
DEBUG(f"Vision response: {response_1}")
|
||||
asyncio.sleep(15)
|
||||
|
||||
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
|
||||
return try_again
|
1081
sijapi/routers/note.py
Normal file
1081
sijapi/routers/note.py
Normal file
File diff suppressed because it is too large
Load diff
16
sijapi/routers/rag.py
Normal file
16
sijapi/routers/rag.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
'''
|
||||
IN DEVELOPMENT: Retrieval-Augmented Generation module.
|
||||
NOTES: Haven't yet decided if this should depend on the Obsidian and Chat modules, or if they should depend on it, or one of one the other the other.
|
||||
'''
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
rag = APIRouter()
|
||||
|
||||
rag.get("/rag/search")
|
||||
async def rag_search_endpoint(query: str, scope: str):
|
||||
pass
|
||||
|
||||
rag.post("/rag/embed")
|
||||
async def rag_upload_endpoint(path: str):
|
||||
pass
|
73
sijapi/routers/serve.py
Normal file
73
sijapi/routers/serve.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
'''
|
||||
Web server module. Used by other modules when serving static content is required, e.g. the sd image generation module. Also used to serve PUBLIC_KEY.
|
||||
'''
|
||||
import os
|
||||
from fastapi import APIRouter, Form, HTTPException, Request, Response
|
||||
from fastapi.responses import FileResponse, PlainTextResponse
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from pathlib import Path
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
||||
from sijapi import DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
||||
|
||||
serve = APIRouter(tags=["public"])
|
||||
|
||||
@serve.get("/pgp")
|
||||
async def get_pgp():
|
||||
return Response(PUBLIC_KEY, media_type="text/plain")
|
||||
|
||||
@serve.get("/img/{image_name}")
|
||||
def serve_image(image_name: str):
|
||||
image_path = os.path.join(SD_IMAGE_DIR, image_name)
|
||||
if os.path.exists(image_path):
|
||||
return FileResponse(image_path)
|
||||
else:
|
||||
return {"error": "Image not found"}
|
||||
|
||||
|
||||
def construct_journal_path(date_str: str) -> Path:
|
||||
try:
|
||||
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
journal_path = OBSIDIAN_VAULT_DIR / f'journal/{date_obj:%Y}/{date_obj:%Y-%m %B}/{date_obj:%Y-%m-%d %A}/{date_obj:%Y-%m-%d %A}.md'
|
||||
return journal_path
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid date format")
|
||||
|
||||
def is_valid_date(date_str: str) -> bool:
|
||||
try:
|
||||
datetime.strptime(date_str, '%Y-%m-%d')
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
@serve.get("/notes/{file_path:path}")
|
||||
async def get_file(file_path: str):
|
||||
|
||||
if is_valid_date(file_path):
|
||||
absolute_path, local_path = assemble_journal_path(file_path, no_timestamp = True)
|
||||
else:
|
||||
absolute_path = OBSIDIAN_VAULT_DIR / file_path
|
||||
if not absolute_path.suffix:
|
||||
absolute_path = absolute_path.with_suffix(".md")
|
||||
|
||||
if not absolute_path.is_file():
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
if absolute_path.suffix == '.md':
|
||||
try:
|
||||
with open(absolute_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
return PlainTextResponse(content)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail="Internal Server Error")
|
||||
elif absolute_path.suffix in ['.png', '.jpg', '.jpeg']:
|
||||
return FileResponse(absolute_path)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Unsupported file type")
|
406
sijapi/routers/tts.py
Normal file
406
sijapi/routers/tts.py
Normal file
|
@ -0,0 +1,406 @@
|
|||
from fastapi import APIRouter, UploadFile, HTTPException, Response, Form, File, BackgroundTasks, Depends, Request
|
||||
from fastapi.responses import Response, StreamingResponse, FileResponse
|
||||
from fastapi.responses import StreamingResponse, PlainTextResponse
|
||||
import requests
|
||||
import json
|
||||
import shutil
|
||||
from io import BytesIO
|
||||
import asyncio
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Union, List
|
||||
from pydub import AudioSegment
|
||||
from TTS.api import TTS
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from time import time
|
||||
import torch
|
||||
import traceback
|
||||
import hashlib
|
||||
import uuid
|
||||
import httpx
|
||||
import tempfile
|
||||
import random
|
||||
import re
|
||||
import os
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import HOME_DIR, DATA_DIR, DEFAULT_VOICE, TTS_DIR, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY
|
||||
from sijapi.utilities import sanitize_filename
|
||||
|
||||
|
||||
### INITIALIZATIONS ###
|
||||
tts = APIRouter(tags=["trusted", "private"])
|
||||
|
||||
DEVICE = torch.device('cpu')
|
||||
|
||||
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
|
||||
|
||||
|
||||
@tts.get("/tts/local_voices", response_model=List[str])
|
||||
async def list_wav_files():
|
||||
wav_files = [file.split('.')[0] for file in os.listdir(VOICE_DIR) if file.endswith(".wav")]
|
||||
return wav_files
|
||||
|
||||
@tts.get("/tts/elevenlabs_voices")
|
||||
async def list_11l_voices():
|
||||
formatted_list = ""
|
||||
url = "https://api.elevenlabs.io/v1/voices"
|
||||
headers = {"xi-api-key": ELEVENLABS_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(url, headers=headers)
|
||||
DEBUG(f"Response: {response}")
|
||||
if response.status_code == 200:
|
||||
voices_data = response.json().get("voices", [])
|
||||
formatted_list = ""
|
||||
for voice in voices_data:
|
||||
name = voice["name"]
|
||||
id = voice["voice_id"]
|
||||
formatted_list += f"{name}: `{id}`\n"
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error determining voice ID: {str(e)}")
|
||||
|
||||
return PlainTextResponse(formatted_list, status_code=200)
|
||||
|
||||
|
||||
|
||||
|
||||
def select_voice(voice_name: str) -> str:
|
||||
try:
|
||||
voice_file = VOICE_DIR / f"{voice_name}.wav"
|
||||
DEBUG(f"select_voice received query to use voice: {voice_name}. Looking for {voice_file} inside {VOICE_DIR}.")
|
||||
|
||||
if voice_file.is_file():
|
||||
return str(voice_file)
|
||||
else:
|
||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
||||
except Exception as e:
|
||||
ERR(f"Voice file not found: {str(e)}")
|
||||
ERR(traceback.format_exc())
|
||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
||||
|
||||
|
||||
|
||||
@tts.post("/tts/speak")
|
||||
@tts.post("/v1/audio/speech")
|
||||
async def generate_speech_endpoint(
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
model: str = Form("eleven_turbo_v2"),
|
||||
text: Optional[str] = Form(None),
|
||||
file: Optional[UploadFile] = File(None),
|
||||
voice: Optional[str] = Form(None),
|
||||
voice_file: Optional[UploadFile] = File(None),
|
||||
speed: Optional[float] = Form(1.1),
|
||||
podcast: Union[bool, str] = Form(False),
|
||||
stream: bool = Form(True)
|
||||
):
|
||||
try:
|
||||
|
||||
podcast = podcast if isinstance(podcast, bool) else podcast.lower() == 'true'
|
||||
text_content = await get_text_content(text, file)
|
||||
if stream:
|
||||
model = model if model else await get_model(voice, voice_file)
|
||||
if model == "eleven_turbo_v2":
|
||||
voice_id = await determine_voice_id(voice)
|
||||
audio_stream = await get_audio_stream(model, text_content, voice_id)
|
||||
return StreamingResponse(audio_stream, media_type="audio/mpeg")
|
||||
else:
|
||||
return await stream_tts(text_content, speed, voice, voice_file)
|
||||
else:
|
||||
return await generate_speech(background_tasks, text_content, voice, voice_file, model, speed, podcast)
|
||||
except Exception as e:
|
||||
ERR(f"Error in TTS: {str(e)}")
|
||||
ERR(traceback.format_exc())
|
||||
raise HTTPException(status_code=666, detail="error in TTS")
|
||||
|
||||
|
||||
async def generate_speech(
|
||||
background_tasks: BackgroundTasks,
|
||||
text: str,
|
||||
voice: str = None,
|
||||
voice_file: UploadFile = None,
|
||||
model: str = None,
|
||||
speed: float = 1.1,
|
||||
podcast: bool = False,
|
||||
title: str = None,
|
||||
output_dir = None
|
||||
) -> str:
|
||||
output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
|
||||
if not output_dir.exists():
|
||||
output_dir.mkdir(parents=True)
|
||||
|
||||
try:
|
||||
model = model if model else await get_model(voice, voice_file)
|
||||
|
||||
if model == "eleven_turbo_v2":
|
||||
INFO(f"Using ElevenLabs.")
|
||||
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
|
||||
return str(audio_file_path)
|
||||
|
||||
elif model == "xtts":
|
||||
INFO(f"Using XTTS2")
|
||||
final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, background_tasks, title, output_dir)
|
||||
background_tasks.add_task(os.remove, str(final_output_dir))
|
||||
return str(final_output_dir)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Invalid model specified")
|
||||
except HTTPException as e:
|
||||
ERR(f"HTTP error: {e}")
|
||||
ERR(traceback.format_exc())
|
||||
raise e
|
||||
except Exception as e:
|
||||
ERR(f"Error: {e}")
|
||||
ERR(traceback.format_exc())
|
||||
raise e
|
||||
|
||||
|
||||
|
||||
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
||||
if voice_file or (voice and select_voice(voice)):
|
||||
return "xtts"
|
||||
elif voice and await determine_voice_id(voice):
|
||||
return "eleven_turbo_v2"
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="No model or voice specified")
|
||||
|
||||
async def determine_voice_id(voice_name: str) -> str:
|
||||
hardcoded_voices = {
|
||||
"alloy": "E3A1KVbKoWSIKSZwSUsW",
|
||||
"echo": "b42GBisbu9r5m5n6pHF7",
|
||||
"fable": "KAX2Y6tTs0oDWq7zZXW7",
|
||||
"onyx": "clQb8NxY08xZ6mX6wCPE",
|
||||
"nova": "6TayTBKLMOsghG7jYuMX",
|
||||
"shimmer": "E7soeOyjpmuZFurvoxZ2",
|
||||
DEFAULT_VOICE: "6TayTBKLMOsghG7jYuMX",
|
||||
"Sangye": "E7soeOyjpmuZFurvoxZ2",
|
||||
"Herzog": "KAX2Y6tTs0oDWq7zZXW7",
|
||||
"Attenborough": "b42GBisbu9r5m5n6pHF7"
|
||||
}
|
||||
|
||||
if voice_name in hardcoded_voices:
|
||||
voice_id = hardcoded_voices[voice_name]
|
||||
DEBUG(f"Found voice ID - {voice_id}")
|
||||
return voice_id
|
||||
|
||||
DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.")
|
||||
url = "https://api.elevenlabs.io/v1/voices"
|
||||
headers = {"xi-api-key": ELEVENLABS_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(url, headers=headers)
|
||||
DEBUG(f"Response: {response}")
|
||||
if response.status_code == 200:
|
||||
voices_data = response.json().get("voices", [])
|
||||
for voice in voices_data:
|
||||
if voice_name == voice["voice_id"] or voice_name == voice["name"]:
|
||||
return voice["voice_id"]
|
||||
except Exception as e:
|
||||
ERR(f"Error determining voice ID: {str(e)}")
|
||||
|
||||
return "6TayTBKLMOsghG7jYuMX"
|
||||
|
||||
|
||||
async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = None, output_dir: str = None):
|
||||
|
||||
voice_id = await determine_voice_id(voice)
|
||||
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
||||
payload = {
|
||||
"text": input_text,
|
||||
"model_id": model
|
||||
}
|
||||
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(url, json=payload, headers=headers)
|
||||
output_dir = output_dir if output_dir else TTS_OUTPUT_DIR
|
||||
title = title if title else datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
filename = f"{sanitize_filename(title)}.mp3"
|
||||
file_path = Path(output_dir) / filename
|
||||
if response.status_code == 200:
|
||||
with open(file_path, "wb") as audio_file:
|
||||
audio_file.write(response.content)
|
||||
return file_path
|
||||
else:
|
||||
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
|
||||
|
||||
|
||||
|
||||
|
||||
async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str:
|
||||
if file:
|
||||
return (await file.read()).decode("utf-8").strip()
|
||||
elif text:
|
||||
return text.strip()
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="No text provided")
|
||||
|
||||
|
||||
|
||||
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
|
||||
if voice:
|
||||
return select_voice(voice)
|
||||
elif voice_file and isinstance(voice_file, UploadFile):
|
||||
VOICE_DIR.mkdir(exist_ok=True)
|
||||
|
||||
content = await voice_file.read()
|
||||
checksum = hashlib.md5(content).hexdigest()
|
||||
|
||||
existing_file = VOICE_DIR / voice_file.filename
|
||||
if existing_file.is_file():
|
||||
with open(existing_file, 'rb') as f:
|
||||
existing_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
if checksum == existing_checksum:
|
||||
return str(existing_file)
|
||||
|
||||
base_name = existing_file.stem
|
||||
counter = 1
|
||||
new_file = existing_file
|
||||
while new_file.is_file():
|
||||
new_file = VOICE_DIR / f"{base_name}{counter:02}.wav"
|
||||
counter += 1
|
||||
|
||||
with open(new_file, 'wb') as f:
|
||||
f.write(content)
|
||||
return str(new_file)
|
||||
|
||||
else:
|
||||
DEBUG(f"{datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
|
||||
return select_voice(DEFAULT_VOICE)
|
||||
|
||||
|
||||
async def local_tts(text_content: str, speed: float, voice: str, voice_file = None, podcast: bool = False, background_tasks: BackgroundTasks = None, title: str = None, output_path: Optional[Path] = None) -> str:
|
||||
if output_path:
|
||||
file_path = Path(output_path)
|
||||
else:
|
||||
datetime_str = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
title = sanitize_filename(title) if title else "Audio"
|
||||
filename = f"{datetime_str}_{title}.wav"
|
||||
file_path = TTS_OUTPUT_DIR / filename
|
||||
|
||||
# Ensure the parent directory exists
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
voice_file_path = await get_voice_file_path(voice, voice_file)
|
||||
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
|
||||
segments = split_text(text_content)
|
||||
combined_audio = AudioSegment.silent(duration=0)
|
||||
|
||||
for i, segment in enumerate(segments):
|
||||
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
|
||||
DEBUG(f"Segment file path: {segment_file_path}")
|
||||
segment_file = await asyncio.to_thread(XTTS.tts_to_file, text=segment, speed=speed, file_path=str(segment_file_path), speaker_wav=[voice_file_path], language="en")
|
||||
DEBUG(f"Segment file generated: {segment_file}")
|
||||
combined_audio += AudioSegment.from_wav(str(segment_file))
|
||||
# Delete the segment file immediately after adding it to the combined audio
|
||||
segment_file_path.unlink()
|
||||
|
||||
if podcast:
|
||||
podcast_file_path = PODCAST_DIR / file_path.name
|
||||
combined_audio.export(podcast_file_path, format="wav")
|
||||
|
||||
combined_audio.export(file_path, format="wav")
|
||||
return str(file_path)
|
||||
|
||||
|
||||
async def stream_tts(text_content: str, speed: float, voice: str, voice_file) -> StreamingResponse:
|
||||
voice_file_path = await get_voice_file_path(voice, voice_file)
|
||||
segments = split_text(text_content)
|
||||
|
||||
async def audio_stream_generator():
|
||||
for segment in segments:
|
||||
segment_file = await generate_tts(segment, speed, voice_file_path)
|
||||
with open(segment_file, 'rb') as f:
|
||||
while chunk := f.read(1024):
|
||||
yield chunk
|
||||
os.remove(segment_file)
|
||||
|
||||
return StreamingResponse(audio_stream_generator(), media_type='audio/wav')
|
||||
|
||||
|
||||
|
||||
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
|
||||
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
|
||||
|
||||
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
|
||||
XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en")
|
||||
|
||||
return output_dir
|
||||
|
||||
|
||||
async def get_audio_stream(model: str, input_text: str, voice: str):
|
||||
voice_id = await determine_voice_id(voice)
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
||||
payload = {
|
||||
"text": input_text,
|
||||
"model_id": "eleven_turbo_v2"
|
||||
}
|
||||
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.iter_content(1024)
|
||||
else:
|
||||
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
|
||||
|
||||
|
||||
|
||||
|
||||
def split_text(text, target_length=35, max_length=50):
|
||||
text = clean_text_for_tts(text)
|
||||
sentences = re.split(r'(?<=[.!?"])\s+', text)
|
||||
segments = []
|
||||
current_segment = []
|
||||
|
||||
for sentence in sentences:
|
||||
sentence_words = sentence.split()
|
||||
segment_length = len(' '.join(current_segment).split())
|
||||
|
||||
if segment_length + len(sentence_words) > max_length:
|
||||
segments.append(' '.join(current_segment))
|
||||
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
|
||||
|
||||
current_segment = [sentence]
|
||||
else:
|
||||
current_segment.extend(sentence_words)
|
||||
|
||||
if current_segment:
|
||||
segments.append(' '.join(current_segment))
|
||||
DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
|
||||
|
||||
return segments
|
||||
|
||||
|
||||
def clean_text_for_tts(text: str) -> str:
|
||||
if text is not None:
|
||||
text = text.replace("\n", " ").replace("\r", " ")
|
||||
text = re.sub(r"[^\w\s.,;:!?'\"]", '', text)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
return text
|
||||
else:
|
||||
DEBUG(f"No text received.")
|
||||
|
||||
|
||||
|
||||
def copy_to_podcast_dir(file_path):
|
||||
try:
|
||||
# Extract the file name from the file path
|
||||
file_name = Path(file_path).name
|
||||
|
||||
# Construct the destination path in the PODCAST_DIR
|
||||
destination_path = PODCAST_DIR / file_name
|
||||
|
||||
# Copy the file to the PODCAST_DIR
|
||||
shutil.copy(file_path, destination_path)
|
||||
|
||||
print(f"File copied successfully to {destination_path}")
|
||||
except FileNotFoundError:
|
||||
print(f"File not found: {file_path}")
|
||||
except shutil.SameFileError:
|
||||
print(f"Source and destination are the same file: {file_path}")
|
||||
except PermissionError:
|
||||
print(f"Permission denied while copying the file: {file_path}")
|
||||
except Exception as e:
|
||||
print(f"An error occurred while copying the file: {file_path}")
|
||||
print(f"Error details: {str(e)}")
|
265
sijapi/routers/weather.py
Normal file
265
sijapi/routers/weather.py
Normal file
|
@ -0,0 +1,265 @@
|
|||
import asyncio
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi import HTTPException
|
||||
from asyncpg.cursor import Cursor
|
||||
from httpx import AsyncClient
|
||||
from typing import Dict
|
||||
from datetime import datetime
|
||||
from shapely.wkb import loads
|
||||
from binascii import unhexlify
|
||||
from sijapi.utilities import localize_dt
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import VISUALCROSSING_API_KEY, TZ
|
||||
from sijapi.utilities import get_db_connection, haversine
|
||||
from sijapi.routers import locate
|
||||
|
||||
weather = APIRouter()
|
||||
|
||||
|
||||
async def get_weather(date_time: datetime, latitude: float, longitude: float):
|
||||
# request_date_str = date_time.strftime("%Y-%m-%d")
|
||||
DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}")
|
||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||
fetch_new_data = True
|
||||
if daily_weather_data:
|
||||
try:
|
||||
DEBUG(f"Daily weather data from db: {daily_weather_data}")
|
||||
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
|
||||
last_updated = localize_dt(last_updated)
|
||||
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
|
||||
stored_loc = loads(stored_loc_data)
|
||||
stored_lat = stored_loc.y
|
||||
stored_lon = stored_loc.x
|
||||
stored_ele = stored_loc.z
|
||||
|
||||
|
||||
hourly_weather = daily_weather_data.get('HourlyWeather')
|
||||
|
||||
DEBUG(f"Hourly: {hourly_weather}")
|
||||
|
||||
DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n")
|
||||
|
||||
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
|
||||
DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
|
||||
|
||||
if last_updated and (date_time <= datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
|
||||
DEBUG(f"We can use existing data... :')")
|
||||
fetch_new_data = False
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error in get_weather: {e}")
|
||||
|
||||
if fetch_new_data:
|
||||
DEBUG(f"We require new data!")
|
||||
request_date_str = date_time.strftime("%Y-%m-%d")
|
||||
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
|
||||
try:
|
||||
async with AsyncClient() as client:
|
||||
response = await client.get(url)
|
||||
if response.status_code == 200:
|
||||
DEBUG(f"Successfully obtained data from VC...")
|
||||
try:
|
||||
weather_data = response.json()
|
||||
store_result = await store_weather_to_db(date_time, weather_data)
|
||||
if store_result == "SUCCESS":
|
||||
DEBUG(f"New weather data for {request_date_str} stored in database...")
|
||||
else:
|
||||
ERR(f"Failed to store weather data for {request_date_str} in database! {store_result}")
|
||||
|
||||
DEBUG(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
|
||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||
if daily_weather_data is not None:
|
||||
return daily_weather_data
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
||||
except Exception as e:
|
||||
ERR(f"Problem parsing VC response or storing data: {e}")
|
||||
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
||||
else:
|
||||
ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}")
|
||||
except Exception as e:
|
||||
ERR(f"Exception during API call: {e}")
|
||||
|
||||
return daily_weather_data
|
||||
|
||||
|
||||
async def store_weather_to_db(date_time: datetime, weather_data: dict):
|
||||
conn = await get_db_connection()
|
||||
|
||||
try:
|
||||
day_data = weather_data.get('days')[0]
|
||||
DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}")
|
||||
|
||||
# Handle preciptype and stations as PostgreSQL arrays
|
||||
preciptype_array = day_data.get('preciptype', []) or []
|
||||
stations_array = day_data.get('stations', []) or []
|
||||
|
||||
date_str = date_time.strftime("%Y-%m-%d")
|
||||
|
||||
# Get location details from weather data if available
|
||||
longitude = weather_data.get('longitude')
|
||||
latitude = weather_data.get('latitude')
|
||||
elevation = locate.get_elevation(latitude, longitude) # 152.4 # default until we add a geocoder that can look up actual elevation; weather_data.get('elevation') # assuming 'elevation' key, replace if different
|
||||
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
|
||||
|
||||
# Correct for the datetime objects
|
||||
day_data['datetime'] = localize_dt(day_data.get('datetime')) #day_data.get('datetime'))
|
||||
day_data['sunrise'] = day_data['datetime'].replace(hour=int(day_data.get('sunrise').split(':')[0]), minute=int(day_data.get('sunrise').split(':')[1]))
|
||||
day_data['sunset'] = day_data['datetime'].replace(hour=int(day_data.get('sunset').split(':')[0]), minute=int(day_data.get('sunset').split(':')[1]))
|
||||
|
||||
daily_weather_params = (
|
||||
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
|
||||
day_data.get('sunset'), day_data.get('sunsetEpoch'),
|
||||
day_data.get('description'), day_data.get('tempmax'),
|
||||
day_data.get('tempmin'), day_data.get('uvindex'),
|
||||
day_data.get('winddir'), day_data.get('windspeed'),
|
||||
day_data.get('icon'), datetime.now(),
|
||||
day_data.get('datetime'), day_data.get('datetimeEpoch'),
|
||||
day_data.get('temp'), day_data.get('feelslikemax'),
|
||||
day_data.get('feelslikemin'), day_data.get('feelslike'),
|
||||
day_data.get('dew'), day_data.get('humidity'),
|
||||
day_data.get('precip'), day_data.get('precipprob'),
|
||||
day_data.get('precipcover'), preciptype_array,
|
||||
day_data.get('snow'), day_data.get('snowdepth'),
|
||||
day_data.get('windgust'), day_data.get('pressure'),
|
||||
day_data.get('cloudcover'), day_data.get('visibility'),
|
||||
day_data.get('solarradiation'), day_data.get('solarenergy'),
|
||||
day_data.get('severerisk', 0), day_data.get('moonphase'),
|
||||
day_data.get('conditions'), stations_array, day_data.get('source'),
|
||||
location_point
|
||||
)
|
||||
except Exception as e:
|
||||
ERR(f"Failed to prepare database query in store_weather_to_db! {e}")
|
||||
|
||||
try:
|
||||
daily_weather_query = '''
|
||||
INSERT INTO DailyWeather (
|
||||
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
|
||||
tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
|
||||
datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
|
||||
dew, humidity, precip, precipprob, precipcover, preciptype,
|
||||
snow, snowdepth, windgust, pressure, cloudcover, visibility,
|
||||
solarradiation, solarenergy, severerisk, moonphase, conditions,
|
||||
stations, source, location
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38)
|
||||
RETURNING id
|
||||
'''
|
||||
|
||||
# Debug logs for better insights
|
||||
# DEBUG("Executing query: %s", daily_weather_query)
|
||||
# DEBUG("With parameters: %s", daily_weather_params)
|
||||
|
||||
# Execute the query to insert daily weather data
|
||||
async with conn.transaction():
|
||||
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
|
||||
|
||||
|
||||
if 'hours' in day_data:
|
||||
for hour_data in day_data['hours']:
|
||||
try:
|
||||
await asyncio.sleep(0.1)
|
||||
# hour_data['datetime'] = parse_date(hour_data.get('datetime'))
|
||||
hour_timestamp = date_str + ' ' + hour_data['datetime']
|
||||
hour_data['datetime'] = localize_dt(hour_timestamp)
|
||||
DEBUG(f"Processing hours now...")
|
||||
# DEBUG(f"Processing {hour_data['datetime']}")
|
||||
|
||||
hour_preciptype_array = hour_data.get('preciptype', []) or []
|
||||
hour_stations_array = hour_data.get('stations', []) or []
|
||||
hourly_weather_params = (
|
||||
daily_weather_id,
|
||||
hour_data['datetime'],
|
||||
hour_data.get('datetimeEpoch'),
|
||||
hour_data['temp'],
|
||||
hour_data['feelslike'],
|
||||
hour_data['humidity'],
|
||||
hour_data['dew'],
|
||||
hour_data['precip'],
|
||||
hour_data['precipprob'],
|
||||
hour_preciptype_array,
|
||||
hour_data['snow'],
|
||||
hour_data['snowdepth'],
|
||||
hour_data['windgust'],
|
||||
hour_data['windspeed'],
|
||||
hour_data['winddir'],
|
||||
hour_data['pressure'],
|
||||
hour_data['cloudcover'],
|
||||
hour_data['visibility'],
|
||||
hour_data['solarradiation'],
|
||||
hour_data['solarenergy'],
|
||||
hour_data['uvindex'],
|
||||
hour_data.get('severerisk', 0),
|
||||
hour_data['conditions'],
|
||||
hour_data['icon'],
|
||||
hour_stations_array,
|
||||
hour_data.get('source', ''),
|
||||
)
|
||||
|
||||
try:
|
||||
hourly_weather_query = '''
|
||||
INSERT INTO HourlyWeather (daily_weather_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob,
|
||||
preciptype, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy,
|
||||
uvindex, severerisk, conditions, icon, stations, source)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26)
|
||||
RETURNING id
|
||||
'''
|
||||
# Debug logs for better insights
|
||||
# DEBUG("Executing query: %s", hourly_weather_query)
|
||||
# DEBUG("With parameters: %s", hourly_weather_params)
|
||||
|
||||
# Execute the query to insert hourly weather data
|
||||
async with conn.transaction():
|
||||
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
|
||||
# ERR(f"\n{hourly_weather_id}")
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"EXCEPTION: {e}")
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"EXCEPTION: {e}")
|
||||
|
||||
return "SUCCESS"
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Error in dailyweather storage: {e}")
|
||||
|
||||
|
||||
|
||||
async def get_weather_from_db(date_time: datetime, latitude: float, longitude: float):
|
||||
conn = await get_db_connection()
|
||||
|
||||
query_date = date_time.date()
|
||||
try:
|
||||
# Query to get daily weather data
|
||||
query = '''
|
||||
SELECT DW.* FROM DailyWeather DW
|
||||
WHERE DW.datetime::date = $1
|
||||
AND ST_DWithin(DW.location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
|
||||
ORDER BY ST_Distance(DW.location, ST_MakePoint($4, $5)::geography) ASC
|
||||
LIMIT 1
|
||||
'''
|
||||
|
||||
daily_weather_data = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
|
||||
|
||||
if daily_weather_data is None:
|
||||
DEBUG(f"No daily weather data retrieved from database.")
|
||||
return None
|
||||
# else:
|
||||
# DEBUG(f"Daily_weather_data: {daily_weather_data}")
|
||||
# Query to get hourly weather data
|
||||
query = '''
|
||||
SELECT HW.* FROM HourlyWeather HW
|
||||
WHERE HW.daily_weather_id = $1
|
||||
'''
|
||||
hourly_weather_data = await conn.fetch(query, daily_weather_data['id'])
|
||||
|
||||
day: Dict = {
|
||||
'DailyWeather': dict(daily_weather_data),
|
||||
'HourlyWeather': [dict(row) for row in hourly_weather_data],
|
||||
}
|
||||
# DEBUG(f"day: {day}")
|
||||
return day
|
||||
except Exception as e:
|
||||
ERR(f"Unexpected error occurred: {e}")
|
||||
|
||||
|
427
sijapi/utilities.py
Normal file
427
sijapi/utilities.py
Normal file
|
@ -0,0 +1,427 @@
|
|||
import re
|
||||
import os
|
||||
from fastapi import Form
|
||||
import re
|
||||
import io
|
||||
from io import BytesIO
|
||||
import base64
|
||||
import math
|
||||
from dateutil import parser
|
||||
from pathlib import Path
|
||||
import filetype
|
||||
from PyPDF2 import PdfReader
|
||||
from pdfminer.high_level import extract_text as pdfminer_extract_text
|
||||
import pytesseract
|
||||
from pdf2image import convert_from_path
|
||||
from datetime import datetime, date, time
|
||||
from typing import Optional, Union, Tuple
|
||||
import asyncio
|
||||
from PIL import Image
|
||||
from dateutil.parser import parse as dateutil_parse
|
||||
from docx import Document
|
||||
import asyncpg
|
||||
from sshtunnel import SSHTunnelForwarder
|
||||
from fastapi import Depends, HTTPException, Request, UploadFile
|
||||
from fastapi.security.api_key import APIKeyHeader
|
||||
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
|
||||
from sijapi import DB, GLOBAL_API_KEY, DB, DB_HOST, DB_PORT, DB_USER, DB_PASS, TZ, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR
|
||||
|
||||
api_key_header = APIKeyHeader(name="Authorization")
|
||||
|
||||
def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
|
||||
if request.url.path not in ["/health", "/ip", "/pgp"]:
|
||||
api_key_query = request.query_params.get("api_key")
|
||||
if api_key_header:
|
||||
api_key = api_key.lower().split("bearer ")[-1]
|
||||
if api_key != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY:
|
||||
raise HTTPException(status_code=401, detail="Invalid or missing API key")
|
||||
|
||||
|
||||
def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str = None, extension: str = None, no_timestamp: bool = False) -> Tuple[Path, Path]:
|
||||
'''
|
||||
Obsidian helper. Takes a datetime and optional subdirectory name, filename, and extension.
|
||||
If an extension is provided, it ensures the path is to a file with that extension.
|
||||
If no extension is provided, it treats the path as a directory.
|
||||
'''
|
||||
year = date_time.strftime(YEAR_FMT)
|
||||
month = date_time.strftime(MONTH_FMT)
|
||||
day = date_time.strftime(DAY_FMT)
|
||||
day_short = date_time.strftime(DAY_SHORT_FMT)
|
||||
timestamp = date_time.strftime("%H%M%S")
|
||||
|
||||
relative_path = Path("journal") / year / month / day
|
||||
|
||||
if not subdir and not filename and not extension:
|
||||
# standard daily note handler, where only the date_time was specified:
|
||||
relative_path = relative_path / f"{day}.md"
|
||||
|
||||
else:
|
||||
|
||||
if subdir:
|
||||
# datestamped subdirectory handler
|
||||
relative_path = relative_path / f"{day_short} {subdir}"
|
||||
|
||||
if filename:
|
||||
if no_timestamp:
|
||||
filename = f"{day_short} {sanitize_filename(filename)}"
|
||||
else:
|
||||
filename = f"{day_short} {timestamp} {sanitize_filename(filename)}"
|
||||
|
||||
if extension:
|
||||
extension = extension if extension.startswith(".") else f".{extension}"
|
||||
filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
|
||||
|
||||
relative_path = relative_path / filename
|
||||
|
||||
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
|
||||
|
||||
os.makedirs(absolute_path.parent, exist_ok=True)
|
||||
|
||||
return absolute_path, relative_path
|
||||
|
||||
def prefix_lines(text: str, prefix: str = '> ') -> str:
|
||||
lines = text.split('\n')
|
||||
prefixed_lines = [f"{prefix}{line.lstrip()}" for line in lines]
|
||||
return '\n'.join(prefixed_lines)
|
||||
|
||||
def f(file):
|
||||
if hasattr(file, 'read') and callable(file.read):
|
||||
return file
|
||||
if isinstance(file, (bytes, bytearray)):
|
||||
return file
|
||||
|
||||
if isinstance(file, Path):
|
||||
file_path = file
|
||||
elif isinstance(file, str):
|
||||
file_path = Path(file)
|
||||
else:
|
||||
raise TypeError("Invalid file type. Expected str, Path, or file-like object.")
|
||||
|
||||
with open(file_path, 'rb') as thefile:
|
||||
return thefile
|
||||
|
||||
|
||||
def get_extension(file):
|
||||
try:
|
||||
if isinstance(file, str):
|
||||
file_path = Path(file)
|
||||
elif isinstance(file, Path):
|
||||
file_path = file
|
||||
else:
|
||||
file_path = Path(file.filename)
|
||||
file_extension = file_path.suffix
|
||||
return file_extension
|
||||
|
||||
except Exception as e:
|
||||
ERR(f"Unable to get extension of {file}")
|
||||
raise e
|
||||
|
||||
|
||||
|
||||
def sanitize_filename(text, max_length=255):
|
||||
"""Sanitize a string to be used as a safe filename."""
|
||||
DEBUG(f"Filename before sanitization: {text}")
|
||||
sanitized = re.sub(r'[^\w\s\.-]', '', text).strip()
|
||||
final_filename = sanitized[:max_length]
|
||||
DEBUG(f"Filename after sanitization: {final_filename}")
|
||||
return final_filename
|
||||
|
||||
def bool_convert(value: str = Form(None)):
|
||||
return value.lower() in ["true", "1", "t", "y", "yes"]
|
||||
|
||||
|
||||
def str_to_bool(value: str) -> bool:
|
||||
"""
|
||||
Convert a string to a boolean.
|
||||
Interprets 'true', '1', 'yes', 'y' as True.
|
||||
Interprets 'false', '0', 'no', 'n', '', or any other string as False.
|
||||
"""
|
||||
|
||||
def get_timestamp():
|
||||
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
async def extract_text(file_path: str) -> str:
|
||||
"""Extract text from file."""
|
||||
if file_path.endswith('.pdf'):
|
||||
return await extract_text_from_pdf(file_path)
|
||||
|
||||
elif file_path.endswith('.docx'):
|
||||
return await extract_text_from_docx(file_path)
|
||||
|
||||
|
||||
def clean_text(text):
|
||||
text = text.replace('-', '')
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
text = re.sub(r'[\u200B-\u200D\uFEFF]', '', text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
async def ocr_pdf(file_path: str) -> str:
|
||||
try:
|
||||
images = await asyncio.to_thread(convert_from_path, file_path)
|
||||
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
|
||||
return ' '.join(texts)
|
||||
except Exception as e:
|
||||
ERR(f"Error during OCR: {str(e)}")
|
||||
return ""
|
||||
|
||||
|
||||
async def extract_text_from_pdf(file_path: str) -> str:
|
||||
if not await is_valid_pdf(file_path):
|
||||
ERR(f"Invalid PDF file: {file_path}")
|
||||
return ""
|
||||
|
||||
text = ''
|
||||
num_pages = 0
|
||||
|
||||
# First, attempt to extract text using PyPDF2
|
||||
try:
|
||||
reader = await asyncio.to_thread(PdfReader, file_path)
|
||||
for page in reader.pages:
|
||||
text_content = page.extract_text() + ' ' if page.extract_text() else ''
|
||||
text += text_content
|
||||
num_pages = len(reader.pages)
|
||||
|
||||
# If text was extracted successfully and it's deemed sufficient, return it
|
||||
if text and not should_use_ocr(text, num_pages):
|
||||
return clean_text(text)
|
||||
except Exception as e:
|
||||
ERR(f"Error extracting text with PyPDF2: {str(e)}")
|
||||
|
||||
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
|
||||
try:
|
||||
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
||||
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
|
||||
return clean_text(text_pdfminer)
|
||||
except Exception as e:
|
||||
ERR(f"Error extracting text with pdfminer.six: {e}")
|
||||
|
||||
# If both methods fail or are deemed insufficient, use OCR as the last resort
|
||||
INFO("Falling back to OCR for text extraction...")
|
||||
return await ocr_pdf(file_path)
|
||||
|
||||
async def is_valid_pdf(file_path: str) -> bool:
|
||||
"""Check if the file at file_path is a valid PDF."""
|
||||
try:
|
||||
kind = filetype.guess(file_path)
|
||||
return kind.mime == 'application/pdf'
|
||||
except Exception as e:
|
||||
ERR(f"Error checking file type: {e}")
|
||||
return False
|
||||
|
||||
async def extract_text_from_pdf(file_path: str) -> str:
|
||||
if not await is_valid_pdf(file_path):
|
||||
ERR(f"Invalid PDF file: {file_path}")
|
||||
return ""
|
||||
|
||||
text = ''
|
||||
try:
|
||||
reader = await asyncio.to_thread(PdfReader, file_path)
|
||||
for page in reader.pages:
|
||||
text_content = page.extract_text() + ' ' if page.extract_text() else ''
|
||||
text += text_content
|
||||
if text.strip(): # Successfully extracted text
|
||||
return clean_text(text)
|
||||
except Exception as e:
|
||||
ERR(f"Error extracting text with PyPDF2: {str(e)}")
|
||||
|
||||
try:
|
||||
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
||||
if text_pdfminer.strip(): # Successfully extracted text
|
||||
return clean_text(text_pdfminer)
|
||||
except Exception as e:
|
||||
ERR(f"Error extracting text with pdfminer.six: {str(e)}")
|
||||
|
||||
# Fall back to OCR
|
||||
INFO("Falling back to OCR for text extraction...")
|
||||
try:
|
||||
images = convert_from_path(file_path)
|
||||
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
|
||||
return ' '.join(ocr_texts).strip()
|
||||
except Exception as e:
|
||||
ERR(f"OCR failed: {str(e)}")
|
||||
return ""
|
||||
|
||||
async def extract_text_from_docx(file_path: str) -> str:
|
||||
def read_docx(file_path):
|
||||
doc = Document(file_path)
|
||||
full_text = [paragraph.text for paragraph in doc.paragraphs]
|
||||
return '\n'.join(full_text)
|
||||
|
||||
return await asyncio.to_thread(read_docx, file_path)
|
||||
|
||||
# Correcting read_text_file to be asynchronous
|
||||
async def read_text_file(file_path: str) -> str:
|
||||
# This opens and reads a file asynchronously by offloading to a separate thread
|
||||
return await asyncio.to_thread(_sync_read_text_file, file_path)
|
||||
|
||||
def _sync_read_text_file(file_path: str) -> str:
|
||||
# Actual synchronous file reading operation
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
return file.read()
|
||||
|
||||
def should_use_ocr(text, num_pages) -> bool:
|
||||
if not text:
|
||||
return True # No text was extracted, use OCR
|
||||
word_count = len(text.split())
|
||||
avg_words_per_page = word_count / num_pages
|
||||
return avg_words_per_page < 10
|
||||
|
||||
|
||||
def convert_to_unix_time(iso_date_str):
|
||||
dt = parser.parse(iso_date_str) # Automatically parses datetime with timezone
|
||||
return int(dt.timestamp())
|
||||
|
||||
|
||||
async def get_db_connection():
|
||||
conn = await asyncpg.connect(
|
||||
database=DB,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
host=DB_HOST,
|
||||
port=DB_PORT
|
||||
)
|
||||
return conn
|
||||
|
||||
temp = """
|
||||
def get_db_connection_ssh(ssh: bool = True):
|
||||
if ssh:
|
||||
with SSHTunnelForwarder(
|
||||
(DB_SSH, 22),
|
||||
DB_SSH_USER=DB_SSH_USER,
|
||||
DB_SSH_PASS=DB_SSH_PASS,
|
||||
remote_bind_address=DB_SSH,
|
||||
local_bind_address=(DB_HOST, DB_PORT)
|
||||
) as tunnel: conn = psycopg2.connect(
|
||||
dbname=DB,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
host=DB_HOST,
|
||||
port=DB_PORT
|
||||
)
|
||||
else:
|
||||
conn = psycopg2.connect(
|
||||
dbname=DB,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
host=DB_HOST,
|
||||
port=DB_PORT
|
||||
)
|
||||
|
||||
return conn
|
||||
"""
|
||||
|
||||
def db_localized():
|
||||
# ssh = True if TS_IP == DB_SSH else False
|
||||
return get_db_connection()
|
||||
|
||||
|
||||
def haversine(lat1, lon1, lat2, lon2):
|
||||
""" Calculate the great circle distance between two points on the earth specified in decimal degrees. """
|
||||
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
|
||||
dlat = lat2 - lat1
|
||||
dlon = lon2 - lon1
|
||||
a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
|
||||
c = 2 * math.asin(math.sqrt(a))
|
||||
r = 6371 # Radius of Earth in kilometers
|
||||
return c * r
|
||||
|
||||
|
||||
|
||||
def convert_degrees_to_cardinal(d):
|
||||
"""
|
||||
Convert degrees to cardinal directions
|
||||
"""
|
||||
dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
|
||||
ix = round(d / (360. / len(dirs)))
|
||||
return dirs[ix % len(dirs)]
|
||||
|
||||
|
||||
def localize_dt(dt):
|
||||
initial_dt = dt
|
||||
try:
|
||||
if isinstance(dt, str):
|
||||
dt = dateutil_parse(dt)
|
||||
DEBUG(f"{initial_dt} was a string so we attempted converting to datetime. Result: {dt}")
|
||||
|
||||
|
||||
if isinstance(dt, datetime):
|
||||
DEBUG(f"{dt} is a datetime object, so we will ensure it is tz-aware.")
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=TZ)
|
||||
# DEBUG(f"{dt} should now be tz-aware. Returning it now.")
|
||||
return dt
|
||||
else:
|
||||
# DEBUG(f"{dt} already was tz-aware. Returning it now.")
|
||||
return dt
|
||||
else:
|
||||
ERR(f"Conversion failed")
|
||||
raise TypeError("Conversion failed")
|
||||
except Exception as e:
|
||||
ERR(f"Error parsing datetime: {e}")
|
||||
raise TypeError("Input must be a string or datetime object")
|
||||
|
||||
|
||||
HOURLY_COLUMNS_MAPPING = {
|
||||
"12am": "00:00:00",
|
||||
"2am": "02:00:00",
|
||||
"4am": "04:00:00",
|
||||
"6am": "06:00:00",
|
||||
"8am": "08:00:00",
|
||||
"10am": "10:00:00",
|
||||
"12pm": "12:00:00",
|
||||
"2pm": "14:00:00",
|
||||
"4pm": "16:00:00",
|
||||
"6pm": "18:00:00",
|
||||
"8pm": "20:00:00",
|
||||
"10pm": "22:00:00",
|
||||
}
|
||||
|
||||
def convert_to_12_hour_format(datetime_obj_or_str):
|
||||
if isinstance(datetime_obj_or_str, str):
|
||||
try:
|
||||
datetime_obj = datetime.strptime(datetime_obj_or_str, "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
try:
|
||||
datetime_obj = datetime.strptime(datetime_obj_or_str, "%H:%M:%S")
|
||||
except ValueError:
|
||||
return "Invalid datetime string format"
|
||||
elif isinstance(datetime_obj_or_str, time):
|
||||
datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S")
|
||||
else:
|
||||
datetime_obj = datetime_obj_or_str
|
||||
|
||||
if isinstance(datetime_obj_or_str, str):
|
||||
time24 = datetime_obj_or_str
|
||||
else:
|
||||
time24 = datetime_obj.strftime("%H:%M:%S")
|
||||
|
||||
reverse_mapping = {v: k for k, v in HOURLY_COLUMNS_MAPPING.items()}
|
||||
return reverse_mapping.get(time24, "Invalid time")
|
||||
|
||||
|
||||
def encode_image_to_base64(image_path):
|
||||
if os.path.exists(image_path):
|
||||
with Image.open(image_path) as image:
|
||||
output_buffer = BytesIO()
|
||||
image.save(output_buffer, format='JPEG')
|
||||
byte_data = output_buffer.getvalue()
|
||||
base64_str = base64.b64encode(byte_data).decode('utf-8')
|
||||
return base64_str
|
||||
else:
|
||||
DEBUG(f"Error: File does not exist at {image_path}")
|
||||
|
||||
def resize_and_convert_image(image_path, max_size=2160, quality=80):
|
||||
with Image.open(image_path) as img:
|
||||
# Resize image
|
||||
ratio = max_size / max(img.size)
|
||||
new_size = tuple([int(x * ratio) for x in img.size])
|
||||
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
||||
|
||||
# Convert to jpg
|
||||
img_byte_arr = io.BytesIO()
|
||||
img.save(img_byte_arr, format='JPEG', quality=quality)
|
||||
img_byte_arr = img_byte_arr.getvalue()
|
||||
|
||||
return img_byte_arr
|
Loading…
Add table
Reference in a new issue