Auto-update: Tue Jun 25 16:59:10 PDT 2024

This commit is contained in:
sanj 2024-06-25 16:59:10 -07:00
parent 0be98b0517
commit dc8743841d
12 changed files with 873 additions and 478 deletions

View file

@ -12,7 +12,7 @@ from typing import List, Optional
import traceback import traceback
import logging import logging
from .logs import Logger from .logs import Logger
from .classes import AutoResponder, IMAPConfig, SMTPConfig, EmailAccount, EmailContact, IncomingEmail from .classes import AutoResponder, IMAPConfig, SMTPConfig, EmailAccount, EmailContact, IncomingEmail, TimezoneTracker, Database
# from sijapi.config.config import load_config # from sijapi.config.config import load_config
# cfg = load_config() # cfg = load_config()
@ -43,6 +43,7 @@ os.makedirs(LOGS_DIR, exist_ok=True)
load_dotenv(ENV_PATH) load_dotenv(ENV_PATH)
### API essentials ### API essentials
DB = Database.from_env()
ROUTERS = os.getenv('ROUTERS', '').split(',') ROUTERS = os.getenv('ROUTERS', '').split(',')
PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',') PUBLIC_SERVICES = os.getenv('PUBLIC_SERVICES', '').split(',')
GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY") GLOBAL_API_KEY = os.getenv("GLOBAL_API_KEY")
@ -68,29 +69,19 @@ os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log" REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
### Databases
DB = os.getenv("DB", 'sijdb')
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
DB_PORT = os.getenv("DB_PORT", 5432)
DB_USER = os.getenv("DB_USER", 'sij')
DB_PASS = os.getenv("DB_PASS")
DB_SSH = os.getenv("DB_SSH", "100.64.64.15")
DB_SSH_USER = os.getenv("DB_SSH_USER")
DB_SSH_PASS = os.getenv("DB_SSH_ENV")
DB_URL = f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB}'
### LOCATE AND WEATHER LOCALIZATIONS ### LOCATE AND WEATHER LOCALIZATIONS
USER_FULLNAME = os.getenv('USER_FULLNAME') USER_FULLNAME = os.getenv('USER_FULLNAME')
USER_BIO = os.getenv('USER_BIO') USER_BIO = os.getenv('USER_BIO')
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
HOME_ZIP = os.getenv("HOME_ZIP") # unimplemented HOME_ZIP = os.getenv("HOME_ZIP") # unimplemented
LOCATION_OVERRIDES = DATA_DIR / "loc_overrides.json" NAMED_LOCATIONS = CONFIG_DIR / "named-locations.yaml"
LOCATIONS_CSV = DATA_DIR / "US.csv"
# DB = DATA_DIR / "weatherlocate.db" # deprecated # DB = DATA_DIR / "weatherlocate.db" # deprecated
VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline") VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline")
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY") VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
GEONAMES_TXT = DATA_DIR / "geonames.txt"
LOCATIONS_CSV = DATA_DIR / "US.csv"
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
DynamicTZ = TimezoneTracker(DB)
### Obsidian & notes ### Obsidian & notes
ALLOWED_FILENAME_CHARS = r'[^\w \.-]' ALLOWED_FILENAME_CHARS = r'[^\w \.-]'
@ -131,7 +122,7 @@ COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR')) COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output' COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py') COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py')
SD_CONFIG_PATH = CONFIG_DIR / 'sd.json' SD_CONFIG_PATH = CONFIG_DIR / 'sd.yaml'
### Summarization ### Summarization
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
@ -155,7 +146,7 @@ TTS_DIR = DATA_DIR / "tts"
os.makedirs(TTS_DIR, exist_ok=True) os.makedirs(TTS_DIR, exist_ok=True)
VOICE_DIR = TTS_DIR / 'voices' VOICE_DIR = TTS_DIR / 'voices'
os.makedirs(VOICE_DIR, exist_ok=True) os.makedirs(VOICE_DIR, exist_ok=True)
PODCAST_DIR = TTS_DIR / "sideloads" PODCAST_DIR = os.getenv("PODCAST_DIR", TTS_DIR / "sideloads")
os.makedirs(PODCAST_DIR, exist_ok=True) os.makedirs(PODCAST_DIR, exist_ok=True)
TTS_OUTPUT_DIR = TTS_DIR / 'outputs' TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True) os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
@ -169,13 +160,7 @@ ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive? ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',') ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
with open(yaml_path, 'r') as file:
config = yaml.safe_load(file)
return [EmailAccount(**account) for account in config['accounts']]
EMAIL_CONFIG = CONFIG_DIR / "email.yaml" EMAIL_CONFIG = CONFIG_DIR / "email.yaml"
EMAIL_ACCOUNTS = load_email_accounts(EMAIL_CONFIG)
AUTORESPOND = True AUTORESPOND = True
### Courtlistener & other webhooks ### Courtlistener & other webhooks

View file

@ -1,6 +1,65 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import List, Optional, Any from typing import List, Optional, Any, Tuple, Dict, Union, Tuple
from datetime import datetime from datetime import datetime, timedelta
import asyncio
import asyncpg
import json
from pydantic import BaseModel, Field
from typing import Optional
import asyncpg
import os
from pydantic import BaseModel, Field
from typing import Optional
from pydantic import BaseModel, Field
from typing import Optional
import asyncpg
from pydantic import BaseModel, Field
from typing import Optional
import asyncpg
from contextlib import asynccontextmanager
class Database(BaseModel):
host: str = Field(..., description="Database host")
port: int = Field(5432, description="Database port")
user: str = Field(..., description="Database user")
password: str = Field(..., description="Database password")
database: str = Field(..., description="Database name")
db_schema: Optional[str] = Field(None, description="Database schema")
@asynccontextmanager
async def get_connection(self):
conn = await asyncpg.connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
database=self.database
)
try:
if self.db_schema:
await conn.execute(f"SET search_path TO {self.db_schema}")
yield conn
finally:
await conn.close()
@classmethod
def from_env(cls):
import os
return cls(
host=os.getenv("DB_HOST", "localhost"),
port=int(os.getenv("DB_PORT", 5432)),
user=os.getenv("DB_USER"),
password=os.getenv("DB_PASSWORD"),
database=os.getenv("DB_NAME"),
db_schema=os.getenv("DB_SCHEMA")
)
def to_dict(self):
return self.dict(exclude_none=True)
class AutoResponder(BaseModel): class AutoResponder(BaseModel):
name: str name: str
@ -8,7 +67,7 @@ class AutoResponder(BaseModel):
context: str context: str
whitelist: List[str] whitelist: List[str]
blacklist: List[str] blacklist: List[str]
img_gen_prompt: Optional[str] = None image_prompt: Optional[str] = None
class IMAPConfig(BaseModel): class IMAPConfig(BaseModel):
username: str username: str
@ -26,20 +85,131 @@ class SMTPConfig(BaseModel):
class EmailAccount(BaseModel): class EmailAccount(BaseModel):
name: str name: str
refresh: int
fullname: Optional[str] fullname: Optional[str]
bio: Optional[str] bio: Optional[str]
summarize: bool = False
podcast: bool = False
imap: IMAPConfig imap: IMAPConfig
smtp: SMTPConfig smtp: SMTPConfig
autoresponders: Optional[List[AutoResponder]] autoresponders: Optional[List[AutoResponder]]
class EmailContact(BaseModel): class EmailContact(BaseModel):
email: str email: str
name: str name: Optional[str] = None
class IncomingEmail(BaseModel): class IncomingEmail(BaseModel):
sender: str sender: str
recipients: List[EmailContact]
datetime_received: datetime datetime_received: datetime
recipients: List[EmailContact]
subject: str subject: str
body: str body: str
attachments: Optional[List[Any]] = None attachments: List[dict] = []
class Location(BaseModel):
latitude: float
longitude: float
datetime: datetime
elevation: Optional[float] = None
altitude: Optional[float] = None
zip: Optional[str] = None
street: Optional[str] = None
city: Optional[str] = None
state: Optional[str] = None
country: Optional[str] = None
context: Optional[Dict[str, Any]] = None
class_: Optional[str] = None
type: Optional[str] = None
name: Optional[str] = None
display_name: Optional[str] = None
boundingbox: Optional[List[str]] = None
amenity: Optional[str] = None
house_number: Optional[str] = None
road: Optional[str] = None
quarter: Optional[str] = None
neighbourhood: Optional[str] = None
suburb: Optional[str] = None
county: Optional[str] = None
country_code: Optional[str] = None
class Config:
json_encoders = {
datetime: lambda dt: dt.isoformat(),
}
class TimezoneTracker:
def __init__(self, db_config: Database, cache_file: str = 'timezone_cache.json'):
self.db_config = db_config
self.cache_file = cache_file
self.last_timezone: str = "America/Los_Angeles"
self.last_update: Optional[datetime] = None
self.last_location: Optional[Tuple[float, float]] = None
async def find(self, lat: float, lon: float) -> str:
query = """
SELECT tzid
FROM timezones
WHERE ST_Contains(geom, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1;
"""
async with await self.db_config.get_connection() as conn:
result = await conn.fetchrow(query, lon, lat)
return result['tzid'] if result else 'Unknown'
async def refresh(self, location: Union[Location, Tuple[float, float]], force: bool = False) -> str:
if isinstance(location, Location):
lat, lon = location.latitude, location.longitude
else:
lat, lon = location
current_time = datetime.now()
if (force or
not self.last_update or
current_time - self.last_update > timedelta(hours=1) or
self.last_location != (lat, lon)):
new_timezone = await self.find(lat, lon)
self.last_timezone = new_timezone
self.last_update = current_time
self.last_location = (lat, lon)
await self.save_to_cache()
return new_timezone
return self.last_timezone
async def save_to_cache(self):
cache_data = {
'last_timezone': self.last_timezone,
'last_update': self.last_update.isoformat() if self.last_update else None,
'last_location': self.last_location
}
with open(self.cache_file, 'w') as f:
json.dump(cache_data, f)
async def load_from_cache(self):
try:
with open(self.cache_file, 'r') as f:
cache_data = json.load(f)
self.last_timezone = cache_data.get('last_timezone')
self.last_update = datetime.fromisoformat(cache_data['last_update']) if cache_data.get('last_update') else None
self.last_location = tuple(cache_data['last_location']) if cache_data.get('last_location') else None
except (FileNotFoundError, json.JSONDecodeError):
# If file doesn't exist or is invalid, we'll start fresh
pass
async def get_current(self, location: Union[Location, Tuple[float, float]]) -> str:
await self.load_from_cache()
return await self.refresh(location)
async def get_last(self) -> Optional[str]:
await self.load_from_cache()
return self.last_timezone

View file

@ -96,7 +96,7 @@ TRUSTED_SUBNETS=127.0.0.1/32,10.13.37.0/24,100.64.64.0/24
# ────────── # ──────────
# #
#─── router selection: ──────────────────────────────────────────────────────────── #─── router selection: ────────────────────────────────────────────────────────────
ROUTERS=asr,calendar,cf,email,health,hooks,llm,locate,note,rag,sd,serve,summarize,time,tts,weather ROUTERS=asr,calendar,cf,email,health,hooks,llm,locate,note,rag,sd,serve,time,tts,weather
UNLOADED=ig UNLOADED=ig
#─── notes: ────────────────────────────────────────────────────────────────────── #─── notes: ──────────────────────────────────────────────────────────────────────
# #
@ -218,18 +218,18 @@ TAILSCALE_API_KEY=¿SECRET? # <--- enter your own TS API key
# ░░ ░ ░T̷ O̷ G̷ E̷ T̷ H̷ ░ R̷. ░ ░ ░ ░ ░ # ░░ ░ ░T̷ O̷ G̷ E̷ T̷ H̷ ░ R̷. ░ ░ ░ ░ ░
# #
#─── frag, or weat,and locate modules:── . #─── frag, or weat,and locate modules:── .
DB=db DB_NAME=db
# #
DB_HOST=127.0.0.1 DB_HOST=127.0.0.1
DB_PORT=5432 DB_PORT=5432
# R E A L T I G H T. # R E A L T I G H T.
DB_USER=postgres DB_USER=postgres
DB_PASS=¿SECRET? # <--- enter your own Postgres password' DB_PASSWORD=¿SECRET? # <--- enter your own Postgres password'
# Y E A H . . . # Y E A H . . .
DB_SSH=100.64.64.15 DB_SSH=100.64.64.15
# . . . 𝙹 𝚄 𝚂 𝚃 𝙻 𝙸 𝙺 𝙴 𝚃 𝙷 𝙰 𝚃. # . . . 𝙹 𝚄 𝚂 𝚃 𝙻 𝙸 𝙺 𝙴 𝚃 𝙷 𝙰 𝚃.
DB_SSH_USER=sij DB_SSH_USER=sij
DB_SSH_PASS=¿SECRET? # <--- enter SSH password for pg server (if not localhost) DB_SSH_PASS=¿SECRET? # <--- enter SSH password for pg server (if not localhost)
#─── notes: ────────────────────────────────────────────────── S E E ? 𝕰 𝖅 - 𝕻 𝖅 #─── notes: ────────────────────────────────────────────────── S E E ? 𝕰 𝖅 - 𝕻 𝖅
# #
# DB, DB_HOST, DB_PORT, DB_USER, and DB_PASS should specify those respective # DB, DB_HOST, DB_PORT, DB_USER, and DB_PASS should specify those respective

View file

@ -0,0 +1,70 @@
accounts:
- name: REDACT@email.com
fullname: Your full name
bio: 'an ai enthusiast'
imap:
username: REDACT@email.com
password: REDACT
host: '127.0.0.1'
port: 1142
encryption: STARTTLS
smtp:
username: REDACT@email.com
password: REDACT
host: '127.0.0.1'
port: 1024
encryption: SSL
autoresponders:
- name: work
style: professional
context: he is currently on leave and will return in late July
whitelist:
- '@work.org'
blacklist:
- 'spam@'
- unsubscribe
- 'no-reply@'
- name: ai
style: cryptic
context: respond to any inquiries with cryptic and vaguely menacing riddles, esoteric assertions, or obscure references.
image_prompt: using visually evocative words, phrases, and sentence fragments, describe an image inspired by the following prompt
whitelist:
- 'colleagues@work.org'
- 'jimbo@'
- 'internal work email:'
blacklist:
- personal
- private
- noneofyerdamnbusiness
- unsubscribe
- 'no-reply@'
- name: otherREDACT@email.com
fullname: sij.ai
bio: an AI bot that responds in riddles.
imap:
username: otherREDACT@email.com
password: REDACT
host: '127.0.0.1'
port: 1142
encryption: STARTTLS
smtp:
username: otherREDACT@email.com
password: REDACT
host: '127.0.0.1'
port: 1024
encryption: SSL
autoresponders:
- name: ai
style: cryptic
context: respond to any inquiries with cryptic and vaguely menacing riddles, esoteric assertions, or obscure references.
image_prompt: using visually evocative words, phrases, and sentence fragments, describe an image inspired by the following prompt
whitelist:
- 'bestfriend@gmail.com'
- 'eximstalking@'
- uniquephraseinsubjectorbody
- 'internal work email:'
blacklist:
- work
- '@work.org'
- unsubscribe
- 'no-reply@'

View file

@ -23,7 +23,6 @@ import multiprocessing
import asyncio import asyncio
import subprocess import subprocess
import tempfile import tempfile
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES

View file

@ -10,29 +10,38 @@ from pathlib import Path
from shutil import move from shutil import move
import tempfile import tempfile
import re import re
import ssl from smtplib import SMTP_SSL, SMTP
from smtplib import SMTP_SSL
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
from email.mime.image import MIMEImage from email.mime.image import MIMEImage
import ssl
from datetime import datetime as dt_datetime from datetime import datetime as dt_datetime
from pydantic import BaseModel from pydantic import BaseModel
from typing import List, Optional, Any from typing import List, Optional, Any
import yaml import yaml
from typing import List, Dict, Optional from typing import List, Dict, Optional
from pydantic import BaseModel from pydantic import BaseModel
from sijapi import DEBUG, ERR, LLM_SYS_MSG
from datetime import datetime as dt_datetime
from typing import Dict
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import PODCAST_DIR, DEFAULT_VOICE, TZ, EMAIL_ACCOUNTS, EmailAccount, IMAPConfig, SMTPConfig from sijapi import PODCAST_DIR, DEFAULT_VOICE, EMAIL_CONFIG
from sijapi.routers import summarize, tts, llm, sd from sijapi.routers import tts, llm, sd, locate
from sijapi.utilities import clean_text, assemble_journal_path, localize_datetime, extract_text, prefix_lines from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
from sijapi.classes import EmailAccount, IncomingEmail, EmailContact from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact
email = APIRouter(tags=["private"]) email = APIRouter(tags=["private"])
def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
with open(yaml_path, 'r') as file:
config = yaml.safe_load(file)
return [EmailAccount(**account) for account in config['accounts']]
def get_account_by_email(email: str) -> Optional[EmailAccount]: def get_account_by_email(email: str) -> Optional[EmailAccount]:
for account in EMAIL_ACCOUNTS: email_accounts = load_email_accounts(EMAIL_CONFIG)
for account in email_accounts:
if account.imap.username.lower() == email.lower(): if account.imap.username.lower() == email.lower():
return account return account
return None return None
@ -54,6 +63,18 @@ def get_imap_connection(account: EmailAccount):
ssl=account.imap.encryption == 'SSL', ssl=account.imap.encryption == 'SSL',
starttls=account.imap.encryption == 'STARTTLS') starttls=account.imap.encryption == 'STARTTLS')
def get_smtp_connection(account: EmailAccount):
context = ssl._create_unverified_context()
if account.smtp.encryption == 'SSL':
return SMTP_SSL(account.smtp.host, account.smtp.port, context=context)
elif account.smtp.encryption == 'STARTTLS':
smtp = SMTP(account.smtp.host, account.smtp.port)
smtp.starttls(context=context)
return smtp
else:
return SMTP(account.smtp.host, account.smtp.port)
def get_matching_autoresponders(email: IncomingEmail, account: EmailAccount) -> List[Dict]: def get_matching_autoresponders(email: IncomingEmail, account: EmailAccount) -> List[Dict]:
matching_profiles = [] matching_profiles = []
@ -72,7 +93,7 @@ def get_matching_autoresponders(email: IncomingEmail, account: EmailAccount) ->
'USER_FULLNAME': account.fullname, 'USER_FULLNAME': account.fullname,
'RESPONSE_STYLE': profile.style, 'RESPONSE_STYLE': profile.style,
'AUTORESPONSE_CONTEXT': profile.context, 'AUTORESPONSE_CONTEXT': profile.context,
'IMG_GEN_PROMPT': profile.img_gen_prompt, 'IMG_GEN_PROMPT': profile.image_prompt,
'USER_BIO': account.bio 'USER_BIO': account.bio
}) })
@ -80,21 +101,44 @@ def get_matching_autoresponders(email: IncomingEmail, account: EmailAccount) ->
async def generate_auto_response_body(e: IncomingEmail, profile: Dict) -> str: async def generate_auto_response_body(e: IncomingEmail, profile: Dict) -> str:
age = dt_datetime.now(TZ) - e.datetime_received now = await locate.localize_datetime(dt_datetime.now())
prompt = f''' then = await locate.localize_datetime(e.datetime_received)
Please generate a personalized auto-response to the following email. The email is from {e.sender} and was sent {age} ago with the subject line "{e.subject}." You are auto-responding on behalf of {profile['USER_FULLNAME']}, who is described by the following short bio (strictly for your context -- do not recite this in the response): "{profile['USER_BIO']}." {profile['USER_FULLNAME']} is unable to respond personally, because {profile['AUTORESPONSE_CONTEXT']}. Everything from here to ~~//END//~~ is the email body. age = now - then
usr_prompt = f'''
Generate a personalized auto-response to the following email:
From: {e.sender}
Sent: {age} ago
Subject: "{e.subject}"
Body:
{e.body} {e.body}
~~//END//~~
Keep your auto-response {profile['RESPONSE_STYLE']} and to the point, but do aim to make it responsive specifically to the sender's inquiry. Respond on behalf of {profile['USER_FULLNAME']}, who is unable to respond personally because {profile['AUTORESPONSE_CONTEXT']}.
''' Keep the response {profile['RESPONSE_STYLE']} and to the point, but responsive to the sender's inquiry.
Do not mention or recite this context information in your response.
'''
sys_prompt = f"You are an AI assistant helping {profile['USER_FULLNAME']} with email responses. {profile['USER_FULLNAME']} is described as: {profile['USER_BIO']}"
try: try:
response = await llm.query_ollama(prompt, 400) response = await llm.query_ollama(usr_prompt, sys_prompt, 400)
return response DEBUG(f"query_ollama response: {response}")
if isinstance(response, str):
return response
elif isinstance(response, dict):
if "message" in response and "content" in response["message"]:
return response["message"]["content"]
else:
ERR(f"Unexpected response structure from query_ollama: {response}")
else:
ERR(f"Unexpected response type from query_ollama: {type(response)}")
# If we reach here, we couldn't extract a valid response
raise ValueError("Could not extract valid response from query_ollama")
except Exception as e: except Exception as e:
ERR(f"Error generating auto-response: {str(e)}") ERR(f"Error generating auto-response: {str(e)}")
return "Thank you for your email. Unfortunately, an error occurred while generating the auto-response. We apologize for any inconvenience." return f"Thank you for your email regarding '{e.subject}'. We are currently experiencing technical difficulties with our auto-response system. We will review your email and respond as soon as possible. We apologize for any inconvenience."
def clean_email_content(html_content): def clean_email_content(html_content):
@ -123,115 +167,113 @@ async def extract_attachments(attachments) -> List[str]:
return attachment_texts return attachment_texts
async def process_unread_emails(summarize_emails: bool = True, podcast: bool = True):
async def process_account(account: EmailAccount):
while True: while True:
for account in EMAIL_ACCOUNTS: start_time = dt_datetime.now()
try:
DEBUG(f"Connecting to {account.name} to check for unread emails...") DEBUG(f"Connecting to {account.name} to check for unread emails...")
try: with get_imap_connection(account) as inbox:
with get_imap_connection(account) as inbox: DEBUG(f"Connected to {account.name}, checking for unread emails now...")
DEBUG(f"Connected to {account.name}, checking for unread emails now...") unread_messages = inbox.messages(unread=True)
unread_messages = inbox.messages(unread=True) for uid, message in unread_messages:
for uid, message in unread_messages: recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to] localized_datetime = await locate.localize_datetime(message.date)
this_email = IncomingEmail( this_email = IncomingEmail(
sender=message.sent_from[0]['email'], sender=message.sent_from[0]['email'],
datetime_received=localize_datetime(message.date), datetime_received=localized_datetime,
recipients=recipients, recipients=recipients,
subject=message.subject, subject=message.subject,
body=clean_email_content(message.body['html'][0]) if message.body['html'] else clean_email_content(message.body['plain'][0]) or "", body=clean_email_content(message.body['html'][0]) if message.body['html'] else clean_email_content(message.body['plain'][0]) or "",
attachments=message.attachments attachments=message.attachments
) )
DEBUG(f"\n\nProcessing email for account {account.name}: {this_email.subject}\n\n")
DEBUG(f"\n\nProcessing email for account {account.name}: {this_email.subject}\n\n") save_success = await save_email(this_email, account)
respond_success = await autorespond(this_email, account)
md_path, md_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".md") if save_success and respond_success:
tts_path, tts_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".wav")
if summarize_emails:
email_content = f'At {this_email.datetime_received}, {this_email.sender} sent an email with the subject line "{this_email.subject}". The email in its entirety reads: \n\n{this_email.body}\n"'
if this_email.attachments:
attachment_texts = await extract_attachments(this_email.attachments)
email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts])
summary = await summarize.summarize_text(email_content)
await tts.local_tts(text_content = summary, speed = 1.1, voice = DEFAULT_VOICE, podcast = podcast, output_path = tts_path)
if podcast:
if PODCAST_DIR.exists():
tts.copy_to_podcast_dir(tts_path)
else:
ERR(f"PODCAST_DIR does not exist: {PODCAST_DIR}")
save_email_as_markdown(this_email, summary, md_path, tts_relative)
DEBUG(f"Email '{this_email.subject}' saved to {md_relative}.")
else:
save_email_as_markdown(this_email, None, md_path, None)
matching_profiles = get_matching_autoresponders(this_email, account)
for profile in matching_profiles:
DEBUG(f"Auto-responding to {this_email.subject} with profile: {profile['USER_FULLNAME']}")
auto_response_subject = f"Auto-Response Re: {this_email.subject}"
auto_response_body = await generate_auto_response_body(this_email, profile)
DEBUG(f"Auto-response: {auto_response_body}")
await send_auto_response(this_email.sender, auto_response_subject, auto_response_body, profile, account)
inbox.mark_seen(uid) inbox.mark_seen(uid)
except Exception as e:
await asyncio.sleep(30) ERR(f"An error occurred for account {account.name}: {e}")
except Exception as e:
ERR(f"An error occurred for account {account.name}: {e}") # Calculate the time taken for processing
await asyncio.sleep(30) processing_time = (dt_datetime.now() - start_time).total_seconds()
# Calculate the remaining time to wait
wait_time = max(0, account.refresh - processing_time)
# Wait for the remaining time
await asyncio.sleep(wait_time)
async def process_all_accounts():
email_accounts = load_email_accounts(EMAIL_CONFIG)
tasks = [asyncio.create_task(process_account(account)) for account in email_accounts]
await asyncio.gather(*tasks)
def save_email_as_markdown(email: IncomingEmail, summary: str, md_path: Path, tts_path: Path):
async def save_email(this_email: IncomingEmail, account: EmailAccount):
try:
md_path, md_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".md")
tts_path, tts_relative = assemble_journal_path(this_email.datetime_received, "Emails", this_email.subject, ".wav")
summary = ""
if account.summarize == True:
email_content = f'At {this_email.datetime_received}, {this_email.sender} sent an email with the subject line "{this_email.subject}". The email in its entirety reads: \n\n{this_email.body}\n"'
if this_email.attachments:
attachment_texts = await extract_attachments(this_email.attachments)
email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts])
summary = await llm.summarize_text(email_content)
await tts.local_tts(text_content = summary, speed = 1.1, voice = DEFAULT_VOICE, podcast = account.podcast, output_path = tts_path)
summary = prefix_lines(summary, '> ')
# Create the markdown content
markdown_content = f'''---
date: {email.datetime_received.strftime('%Y-%m-%d')}
tags:
- email
---
| | | |
| --: | :--: | :--: |
| *received* | **{email.datetime_received.strftime('%B %d, %Y at %H:%M:%S %Z')}** | |
| *from* | **[[{email.sender}]]** | |
| *to* | {', '.join([f'**[[{recipient}]]**' for recipient in email.recipients])} | |
| *subject* | **{email.subject}** | |
''' '''
Saves an email as a markdown file in the specified directory.
Args: if summary:
email (IncomingEmail): The email object containing email details. markdown_content += f'''
summary (str): The summary of the email. > [!summary] Summary
tts_path (str): The path to the text-to-speech audio file. > {summary}
''' '''
DEBUG(f"Saving email to {md_path}...")
# Sanitize filename to avoid issues with filesystems if tts_path.exists():
filename = f"{email.datetime_received.strftime('%Y%m%d%H%M%S')}_{email.subject.replace('/', '-')}.md".replace(':', '-').replace(' ', '_') markdown_content += f'''
![[{tts_path}]]
'''
markdown_content += f'''
---
{email.body}
'''
with open(md_path, 'w', encoding='utf-8') as md_file:
md_file.write(markdown_content)
summary = prefix_lines(summary, '> ') DEBUG(f"Saved markdown to {md_path}")
# Create the markdown content
markdown_content = f'''--- return True
date: {email.datetime_received.strftime('%Y-%m-%d')}
tags:
- email
---
| | | |
| --: | :--: | :--: |
| *received* | **{email.datetime_received.strftime('%B %d, %Y at %H:%M:%S %Z')}** | |
| *from* | **[[{email.sender}]]** | |
| *to* | {', '.join([f'**[[{recipient}]]**' for recipient in email.recipients])} | |
| *subject* | **{email.subject}** | |
'''
if summary: except Exception as e:
markdown_content += f''' ERR(f"Exception: {e}")
> [!summary] Summary return False
> {summary}
'''
if tts_path:
markdown_content += f'''
![[{tts_path}]]
'''
markdown_content += f'''
---
{email.body}
'''
with open(md_path, 'w', encoding='utf-8') as md_file:
md_file.write(markdown_content)
DEBUG(f"Saved markdown to {md_path}")
async def autorespond(this_email: IncomingEmail, account: EmailAccount):
matching_profiles = get_matching_autoresponders(this_email, account)
for profile in matching_profiles:
DEBUG(f"Auto-responding to {this_email.subject} with profile: {profile['USER_FULLNAME']}")
auto_response_subject = f"Auto-Response Re: {this_email.subject}"
auto_response_body = await generate_auto_response_body(this_email, profile)
DEBUG(f"Auto-response: {auto_response_body}")
await send_auto_response(this_email.sender, auto_response_subject, auto_response_body, profile, account)
async def send_auto_response(to_email, subject, body, profile, account): async def send_auto_response(to_email, subject, body, profile, account):
DEBUG(f"Sending auto response to {to_email}...") DEBUG(f"Sending auto response to {to_email}...")
@ -243,35 +285,24 @@ async def send_auto_response(to_email, subject, body, profile, account):
message.attach(MIMEText(body, 'plain')) message.attach(MIMEText(body, 'plain'))
if profile['IMG_GEN_PROMPT']: if profile['IMG_GEN_PROMPT']:
jpg_path = sd.workflow(profile['IMG_GEN_PROMPT'], earlyout=False, downscale_to_fit=True) jpg_path = await sd.workflow(profile['IMG_GEN_PROMPT'], earlyout=False, downscale_to_fit=True)
if jpg_path and os.path.exists(jpg_path): if jpg_path and os.path.exists(jpg_path):
with open(jpg_path, 'rb') as img_file: with open(jpg_path, 'rb') as img_file:
img = MIMEImage(img_file.read(), name=os.path.basename(jpg_path)) img = MIMEImage(img_file.read(), name=os.path.basename(jpg_path))
message.attach(img) message.attach(img)
context = ssl._create_unverified_context() with get_smtp_connection(account) as server:
with SMTP_SSL(account.smtp.host, account.smtp.port, context=context) as server:
server.login(account.smtp.username, account.smtp.password) server.login(account.smtp.username, account.smtp.password)
server.send_message(message) server.send_message(message)
INFO(f"Auto-response sent to {to_email} concerning {subject} from account {account.name}") INFO(f"Auto-response sent to {to_email} concerning {subject} from account {account.name}")
return True
except Exception as e: except Exception as e:
ERR(f"Error in preparing/sending auto-response from account {account.name}: {e}") ERR(f"Error in preparing/sending auto-response from account {account.name}: {e}")
raise e return False
@email.on_event("startup") @email.on_event("startup")
async def startup_event(): async def startup_event():
asyncio.create_task(process_unread_emails()) asyncio.create_task(process_all_accounts())
####

View file

@ -1,10 +1,9 @@
#routers/llm.py #routers/llm.py
from fastapi import APIRouter, HTTPException, Request, Response from fastapi import APIRouter, HTTPException, Request, Response, BackgroundTasks, File, Form, UploadFile
from fastapi.responses import StreamingResponse, JSONResponse from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
from starlette.responses import StreamingResponse
from datetime import datetime as dt_datetime from datetime import datetime as dt_datetime
from dateutil import parser from dateutil import parser
from typing import List, Dict, Any, Union from typing import List, Dict, Any, Union, Optional
from pydantic import BaseModel, root_validator, ValidationError from pydantic import BaseModel, root_validator, ValidationError
import aiofiles import aiofiles
import os import os
@ -17,21 +16,20 @@ import base64
from pathlib import Path from pathlib import Path
import ollama import ollama
from ollama import AsyncClient as Ollama, list as OllamaList from ollama import AsyncClient as Ollama, list as OllamaList
import aiofiles
import time import time
import asyncio import asyncio
from pathlib import Path import tempfile
from fastapi import FastAPI, Request, HTTPException, APIRouter import shutil
from fastapi.responses import JSONResponse, StreamingResponse import html2text
from dotenv import load_dotenv import markdown
from sijapi import BASE_DIR, DATA_DIR, LOGS_DIR, CONFIG_DIR, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY from sijapi import LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY, DEBUG, INFO, WARN, ERR, CRITICAL, DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
from sijapi.utilities import convert_to_unix_time, sanitize_filename from sijapi.routers.tts import generate_speech
from sijapi.routers.asr import transcribe_audio
llm = APIRouter() llm = APIRouter()
# Initialize chromadb client # Initialize chromadb client
client = chromadb.Client() client = chromadb.Client()
OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian") OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
@ -80,11 +78,11 @@ async def generate_response(prompt: str):
return {"response": output['response']} return {"response": output['response']}
async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, max_tokens: int = 200): async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LLM, max_tokens: int = 200):
messages = [{"role": "system", "content": sys}, messages = [{"role": "system", "content": sys},
{"role": "user", "content": usr}] {"role": "user", "content": usr}]
LLM = Ollama() LLM = Ollama()
response = await LLM.chat(model=DEFAULT_LLM, messages=messages, options={"num_predict": max_tokens}) response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
DEBUG(response) DEBUG(response)
if "message" in response: if "message" in response:
@ -482,3 +480,186 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens) try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
return try_again return try_again
@llm.get("/summarize")
async def summarize_get(text: str = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
summarized_text = await summarize_text(text, instruction)
return summarized_text
@llm.post("/summarize")
async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
text_content = text if text else await extract_text(file)
summarized_text = await summarize_text(text_content, instruction)
return summarized_text
@llm.post("/speaksummary")
async def summarize_tts_endpoint(background_tasks: BackgroundTasks, instruction: str = Form(SUMMARY_INSTRUCT), file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), voice: Optional[str] = Form(DEFAULT_VOICE), speed: Optional[float] = Form(1.2), podcast: Union[bool, str] = Form(False)):
podcast = str_to_bool(str(podcast)) # Proper boolean conversion
text_content = text if text else extract_text(file)
final_output_path = await summarize_tts(text_content, instruction, voice, speed, podcast)
return FileResponse(path=final_output_path, filename=os.path.basename(final_output_path), media_type='audio/wav')
async def summarize_tts(
text: str,
instruction: str = SUMMARY_INSTRUCT,
voice: Optional[str] = DEFAULT_VOICE,
speed: float = 1.1,
podcast: bool = False,
LLM: Ollama = None
):
LLM = LLM if LLM else Ollama()
summarized_text = await summarize_text(text, instruction, LLM=LLM)
filename = await summarize_text(summarized_text, "Provide a title for this summary no longer than 4 words")
filename = sanitize_filename(filename)
filename = ' '.join(filename.split()[:5])
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{timestamp}{filename}.wav"
background_tasks = BackgroundTasks()
final_output_path = await generate_speech(background_tasks, summarized_text, voice, "xtts", speed=speed, podcast=podcast, title=filename)
DEBUG(f"summary_tts completed with final_output_path: {final_output_path}")
return final_output_path
async def get_title(text: str, LLM: Ollama() = None):
LLM = LLM if LLM else Ollama()
title = await process_chunk("Generate a title for this text", text, 1, 1, 12, LLM)
title = sanitize_filename(title)
return title
def split_text_into_chunks(text: str) -> List[str]:
"""
Splits the given text into manageable chunks based on predefined size and overlap.
"""
words = text.split()
adjusted_chunk_size = max(1, int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)) # Ensure at least 1
adjusted_overlap = max(0, int(SUMMARY_CHUNK_OVERLAP / SUMMARY_TPW)) # Ensure non-negative
chunks = []
for i in range(0, len(words), adjusted_chunk_size - adjusted_overlap):
DEBUG(f"We are on iteration # {i} if split_text_into_chunks.")
chunk = ' '.join(words[i:i + adjusted_chunk_size])
chunks.append(chunk)
return chunks
def calculate_max_tokens(text: str) -> int:
tokens_count = max(1, int(len(text.split()) * SUMMARY_TPW)) # Ensure at least 1
return min(tokens_count // 4, SUMMARY_CHUNK_SIZE)
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], background_tasks: BackgroundTasks = None) -> str:
if isinstance(file, UploadFile):
file_extension = get_extension(file)
temp_file_path = tempfile.mktemp(suffix=file_extension)
with open(temp_file_path, 'wb') as buffer:
shutil.copyfileobj(file.file, buffer)
file_path = temp_file_path
elif isinstance(file, (bytes, bytearray)):
temp_file_path = tempfile.mktemp()
with open(temp_file_path, 'wb') as buffer:
buffer.write(file)
file_path = temp_file_path
elif isinstance(file, (str, Path)):
file_path = str(file)
else:
raise ValueError("Unsupported file type")
_, file_ext = os.path.splitext(file_path)
file_ext = file_ext.lower()
text_content = ""
if file_ext == '.pdf':
text_content = await extract_text_from_pdf(file_path)
elif file_ext in ['.wav', '.m4a', '.m4v', '.mp3', '.mp4']:
text_content = await transcribe_audio(file_path=file_path)
elif file_ext == '.md':
text_content = await read_text_file(file_path)
text_content = markdown.markdown(text_content)
elif file_ext == '.html':
text_content = await read_text_file(file_path)
text_content = html2text.html2text(text_content)
elif file_ext in ['.txt', '.csv', '.json']:
text_content = await read_text_file(file_path)
elif file_ext == '.docx':
text_content = await extract_text_from_docx(file_path)
if background_tasks and 'temp_file_path' in locals():
background_tasks.add_task(os.remove, temp_file_path)
elif 'temp_file_path' in locals():
os.remove(temp_file_path)
return text_content
async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_override: int = None, length_quotient: float = SUMMARY_LENGTH_RATIO, LLM: Ollama = None):
"""
Process the given text: split into chunks, summarize each chunk, and
potentially summarize the concatenated summary for long texts.
"""
LLM = LLM if LLM else Ollama()
chunked_text = split_text_into_chunks(text)
total_parts = max(1, len(chunked_text)) # Ensure at least 1
total_words_count = len(text.split())
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW)) # Ensure at least 1
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
individual_summary_length = max(1, corrected_total_summary_length // total_parts) # Ensure at least 1
DEBUG(f"Text split into {total_parts} chunks.")
summaries = await asyncio.gather(*[
process_chunk(instruction, chunk, i+1, total_parts, individual_summary_length, LLM) for i, chunk in enumerate(chunked_text)
])
concatenated_summary = ' '.join(summaries)
if total_parts > 1:
concatenated_summary = await process_chunk(instruction, concatenated_summary, 1, 1)
return concatenated_summary
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, max_tokens: Optional[int] = None, LLM: Ollama = None) -> str:
"""
Process a portion of text using the ollama library asynchronously.
"""
LLM = LLM if LLM else Ollama()
words_count = max(1, len(text.split())) # Ensure at least 1
tokens_count = max(1, int(words_count * SUMMARY_TPW)) # Ensure at least 1
fraction_tokens = max(1, tokens_count // SUMMARY_LENGTH_RATIO) # Ensure at least 1
if max_tokens is None:
max_tokens = min(fraction_tokens, SUMMARY_CHUNK_SIZE // max(1, total_parts)) # Ensure at least 1
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) # Ensure a minimum token count to avoid tiny processing chunks
DEBUG(f"Summarizing part {part} of {total_parts}: Max_tokens: {max_tokens}")
if part and total_parts > 1:
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
else:
prompt = f"{instruction}:\n\n{text}"
DEBUG(f"Starting LLM.generate for part {part} of {total_parts}")
response = await LLM.generate(
model=SUMMARY_MODEL,
prompt=prompt,
stream=False,
options={'num_predict': max_tokens, 'temperature': 0.6}
)
text_response = response['response']
DEBUG(f"Completed LLM.generate for part {part} of {total_parts}")
return text_response
async def title_and_summary(extracted_text: str):
title = await get_title(extracted_text)
processed_title = title.split("\n")[-1]
processed_title = processed_title.split("\r")[-1]
processed_title = sanitize_filename(processed_title)
summary = await summarize_text(extracted_text)
return processed_title, summary

View file

@ -17,13 +17,12 @@ from requests.adapters import HTTPAdapter
import re import re
import os import os
from datetime import timedelta, datetime, time as dt_time, date as dt_date from datetime import timedelta, datetime, time as dt_time, date as dt_date
from sijapi.utilities import localize_datetime
from fastapi import HTTPException, status from fastapi import HTTPException, status
from pathlib import Path from pathlib import Path
from fastapi import APIRouter, Query, HTTPException from fastapi import APIRouter, Query, HTTPException
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, INFO from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, INFO
from sijapi import YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, BASE_URL, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, TZ from sijapi import YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, BASE_URL, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, TZ
from sijapi.routers import tts, time, sd, locate, weather, asr, calendar, summarize from sijapi.routers import tts, llm, time, sd, locate, weather, asr, calendar
from sijapi.routers.locate import Location from sijapi.routers.locate import Location
from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, HOURLY_COLUMNS_MAPPING from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, HOURLY_COLUMNS_MAPPING
@ -39,7 +38,7 @@ async def build_daily_note_range_endpoint(dt_start: str, dt_end: str):
results = [] results = []
current_date = start_date current_date = start_date
while current_date <= end_date: while current_date <= end_date:
formatted_date = localize_datetime(current_date) formatted_date = await locate.localize_datetime(current_date)
result = await build_daily_note(formatted_date) result = await build_daily_note(formatted_date)
results.append(result) results.append(result)
current_date += timedelta(days=1) current_date += timedelta(days=1)
@ -58,7 +57,7 @@ Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses th
header = f"# [[{day_before}|← ]] {formatted_day} [[{day_after}| →]]\n\n" header = f"# [[{day_before}|← ]] {formatted_day} [[{day_after}| →]]\n\n"
places = await locate.fetch_locations(date_time) places = await locate.fetch_locations(date_time)
location = locate.reverse_geocode(places[0].latitude, places[0].longitude) location = await locate.reverse_geocode(places[0].latitude, places[0].longitude)
timeslips = await build_daily_timeslips(date_time) timeslips = await build_daily_timeslips(date_time)
@ -271,9 +270,9 @@ async def process_document(
with open(file_path, 'wb') as f: with open(file_path, 'wb') as f:
f.write(document_content) f.write(document_content)
parsed_content = await summarize.extract_text(file_path) # Ensure extract_text is awaited parsed_content = await llm.extract_text(file_path) # Ensure extract_text is awaited
llm_title, summary = await summarize.title_and_summary(parsed_content) llm_title, summary = await llm.title_and_summary(parsed_content)
try: try:
readable_title = sanitize_filename(title if title else document.filename) readable_title = sanitize_filename(title if title else document.filename)
@ -342,7 +341,7 @@ async def process_article(
timestamp = datetime.now().strftime('%b %d, %Y at %H:%M') timestamp = datetime.now().strftime('%b %d, %Y at %H:%M')
parsed_content = parse_article(url, source) parsed_content = await parse_article(url, source)
if parsed_content is None: if parsed_content is None:
return {"error": "Failed to retrieve content"} return {"error": "Failed to retrieve content"}
@ -350,7 +349,7 @@ async def process_article(
markdown_filename, relative_path = assemble_journal_path(datetime.now(), subdir="Articles", filename=readable_title, extension=".md") markdown_filename, relative_path = assemble_journal_path(datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
try: try:
summary = await summarize.summarize_text(parsed_content["content"], "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.") summary = await llm.summarize_text(parsed_content["content"], "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
summary = summary.replace('\n', ' ') # Remove line breaks summary = summary.replace('\n', ' ') # Remove line breaks
if tts_mode == "full" or tts_mode == "content": if tts_mode == "full" or tts_mode == "content":
@ -427,7 +426,7 @@ tags:
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
def parse_article(url: str, source: Optional[str] = None): async def parse_article(url: str, source: Optional[str] = None):
source = source if source else trafilatura.fetch_url(url) source = source if source else trafilatura.fetch_url(url)
traf = trafilatura.extract_metadata(filecontent=source, default_url=url) traf = trafilatura.extract_metadata(filecontent=source, default_url=url)
@ -442,7 +441,12 @@ def parse_article(url: str, source: Optional[str] = None):
title = np3k.title or traf.title title = np3k.title or traf.title
authors = np3k.authors or traf.author authors = np3k.authors or traf.author
authors = authors if isinstance(authors, List) else [authors] authors = authors if isinstance(authors, List) else [authors]
date = np3k.publish_date or localize_datetime(traf.date) date = np3k.publish_date or traf.date
try:
date = await locate.localize_datetime(date)
except:
DEBUG(f"Failed to localize {date}")
date = await locate.localize_datetime(datetime.now())
excerpt = np3k.meta_description or traf.description excerpt = np3k.meta_description or traf.description
content = trafilatura.extract(source, output_format="markdown", include_comments=False) or np3k.text content = trafilatura.extract(source, output_format="markdown", include_comments=False) or np3k.text
image = np3k.top_image or traf.image image = np3k.top_image or traf.image
@ -474,7 +478,7 @@ async def process_archive(
timestamp = datetime.now().strftime('%b %d, %Y at %H:%M') timestamp = datetime.now().strftime('%b %d, %Y at %H:%M')
parsed_content = parse_article(url, source) parsed_content = await parse_article(url, source)
if parsed_content is None: if parsed_content is None:
return {"error": "Failed to retrieve content"} return {"error": "Failed to retrieve content"}
content = parsed_content["content"] content = parsed_content["content"]
@ -635,7 +639,7 @@ async def banner_endpoint(dt: str, location: str = None, mood: str = None, other
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary. Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
''' '''
DEBUG(f"banner_endpoint requested with date: {dt} ({type(dt)})") DEBUG(f"banner_endpoint requested with date: {dt} ({type(dt)})")
date_time = localize_datetime(dt) date_time = await locate.localize_datetime(dt)
DEBUG(f"date_time after localization: {date_time} ({type(date_time)})") DEBUG(f"date_time after localization: {date_time} ({type(date_time)})")
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context) jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
return jpg_path return jpg_path
@ -643,7 +647,7 @@ async def banner_endpoint(dt: str, location: str = None, mood: str = None, other
async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None): async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None):
DEBUG(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}") DEBUG(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}")
date_time = localize_datetime(dt) date_time = await locate.localize_datetime(dt)
DEBUG(f"generate_banner called with date_time: {date_time}") DEBUG(f"generate_banner called with date_time: {date_time}")
destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True) destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True)
DEBUG(f"destination path generated: {destination_path}") DEBUG(f"destination path generated: {destination_path}")
@ -699,7 +703,7 @@ async def note_weather_get(
): ):
try: try:
date_time = datetime.now() if date == "0" else localize_datetime(date) date_time = datetime.now() if date == "0" else locate.localize_datetime(date)
DEBUG(f"date: {date} .. date_time: {date_time}") DEBUG(f"date: {date} .. date_time: {date_time}")
content = await update_dn_weather(date_time) #, lat, lon) content = await update_dn_weather(date_time) #, lat, lon)
return JSONResponse(content={"forecast": content}, status_code=200) return JSONResponse(content={"forecast": content}, status_code=200)
@ -714,7 +718,7 @@ async def note_weather_get(
@note.post("/update/note/{date}") @note.post("/update/note/{date}")
async def post_update_daily_weather_and_calendar_and_timeslips(date: str) -> PlainTextResponse: async def post_update_daily_weather_and_calendar_and_timeslips(date: str) -> PlainTextResponse:
date_time = localize_datetime(date) date_time = await locate.localize_datetime(date)
await update_dn_weather(date_time) await update_dn_weather(date_time)
await update_daily_note_events(date_time) await update_daily_note_events(date_time)
await build_daily_timeslips(date_time) await build_daily_timeslips(date_time)
@ -1091,7 +1095,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
# description = remove_characters(description) # description = remove_characters(description)
# description = remove_characters(description) # description = remove_characters(description)
if len(description) > 150: if len(description) > 150:
description = await summarize.summarize_text(description, length_override=150) description = await llm.summarize_text(description, length_override=150)
event_markdown += f"\n * {description}" event_markdown += f"\n * {description}"
event_markdown += f"\n " event_markdown += f"\n "
@ -1117,7 +1121,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
@note.get("/note/events", response_class=PlainTextResponse) @note.get("/note/events", response_class=PlainTextResponse)
async def note_events_endpoint(date: str = Query(None)): async def note_events_endpoint(date: str = Query(None)):
date_time = localize_datetime(date) if date else datetime.now(TZ) date_time = await locate.localize_datetime(date) if date else datetime.now(TZ)
response = await update_daily_note_events(date_time) response = await update_daily_note_events(date_time)
return PlainTextResponse(content=response, status_code=200) return PlainTextResponse(content=response, status_code=200)

View file

@ -14,7 +14,8 @@ from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from pathlib import Path from pathlib import Path
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path, localize_datetime from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
from sijapi.routers.locate import localize_datetime
from sijapi import DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR from sijapi import DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
serve = APIRouter(tags=["public"]) serve = APIRouter(tags=["public"])
@ -50,7 +51,7 @@ def is_valid_date(date_str: str) -> bool:
@serve.get("/notes/{file_path:path}") @serve.get("/notes/{file_path:path}")
async def get_file(file_path: str): async def get_file(file_path: str):
try: try:
date_time = localize_datetime(file_path); date_time = await localize_datetime(file_path);
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True) absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
except ValueError as e: except ValueError as e:
DEBUG(f"Unable to parse {file_path} as a date, now trying to use it as a local path") DEBUG(f"Unable to parse {file_path} as a date, now trying to use it as a local path")

View file

@ -273,7 +273,17 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
return select_voice(DEFAULT_VOICE) return select_voice(DEFAULT_VOICE)
async def local_tts(text_content: str, speed: float, voice: str, voice_file = None, podcast: bool = False, background_tasks: BackgroundTasks = None, title: str = None, output_path: Optional[Path] = None) -> str:
async def local_tts(
text_content: str,
speed: float,
voice: str,
voice_file = None,
podcast: bool = False,
background_tasks: BackgroundTasks = None,
title: str = None,
output_path: Optional[Path] = None
) -> str:
if output_path: if output_path:
file_path = Path(output_path) file_path = Path(output_path)
else: else:
@ -286,27 +296,47 @@ async def local_tts(text_content: str, speed: float, voice: str, voice_file = No
file_path.parent.mkdir(parents=True, exist_ok=True) file_path.parent.mkdir(parents=True, exist_ok=True)
voice_file_path = await get_voice_file_path(voice, voice_file) voice_file_path = await get_voice_file_path(voice, voice_file)
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
# Initialize TTS model in a separate thread
XTTS = await asyncio.to_thread(TTS, model_name=MODEL_NAME)
await asyncio.to_thread(XTTS.to, DEVICE)
segments = split_text(text_content) segments = split_text(text_content)
combined_audio = AudioSegment.silent(duration=0) combined_audio = AudioSegment.silent(duration=0)
for i, segment in enumerate(segments): for i, segment in enumerate(segments):
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav" segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
DEBUG(f"Segment file path: {segment_file_path}") DEBUG(f"Segment file path: {segment_file_path}")
segment_file = await asyncio.to_thread(XTTS.tts_to_file, text=segment, speed=speed, file_path=str(segment_file_path), speaker_wav=[voice_file_path], language="en")
DEBUG(f"Segment file generated: {segment_file}") # Run TTS in a separate thread
combined_audio += AudioSegment.from_wav(str(segment_file)) await asyncio.to_thread(
# Delete the segment file immediately after adding it to the combined audio XTTS.tts_to_file,
segment_file_path.unlink() text=segment,
speed=speed,
file_path=str(segment_file_path),
speaker_wav=[voice_file_path],
language="en"
)
DEBUG(f"Segment file generated: {segment_file_path}")
# Load and combine audio in a separate thread
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path))
combined_audio += segment_audio
# Delete the segment file
await asyncio.to_thread(segment_file_path.unlink)
# Export the combined audio in a separate thread
if podcast: if podcast:
podcast_file_path = PODCAST_DIR / file_path.name podcast_file_path = PODCAST_DIR / file_path.name
combined_audio.export(podcast_file_path, format="wav") await asyncio.to_thread(combined_audio.export, podcast_file_path, format="wav")
await asyncio.to_thread(combined_audio.export, file_path, format="wav")
combined_audio.export(file_path, format="wav")
return str(file_path) return str(file_path)
async def stream_tts(text_content: str, speed: float, voice: str, voice_file) -> StreamingResponse: async def stream_tts(text_content: str, speed: float, voice: str, voice_file) -> StreamingResponse:
voice_file_path = await get_voice_file_path(voice, voice_file) voice_file_path = await get_voice_file_path(voice, voice_file)
segments = split_text(text_content) segments = split_text(text_content)

View file

@ -7,10 +7,9 @@ from typing import Dict
from datetime import datetime from datetime import datetime
from shapely.wkb import loads from shapely.wkb import loads
from binascii import unhexlify from binascii import unhexlify
from sijapi.utilities import localize_datetime
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import VISUALCROSSING_API_KEY, TZ from sijapi import VISUALCROSSING_API_KEY, TZ, DB
from sijapi.utilities import get_db_connection, haversine from sijapi.utilities import haversine
from sijapi.routers import locate from sijapi.routers import locate
weather = APIRouter() weather = APIRouter()
@ -25,7 +24,7 @@ async def get_weather(date_time: datetime, latitude: float, longitude: float):
try: try:
DEBUG(f"Daily weather data from db: {daily_weather_data}") DEBUG(f"Daily weather data from db: {daily_weather_data}")
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated')) last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
last_updated = localize_datetime(last_updated) last_updated = await locate.localize_datetime(last_updated)
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location')) stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
stored_loc = loads(stored_loc_data) stored_loc = loads(stored_loc_data)
stored_lat = stored_loc.y stored_lat = stored_loc.y
@ -84,182 +83,180 @@ async def get_weather(date_time: datetime, latitude: float, longitude: float):
async def store_weather_to_db(date_time: datetime, weather_data: dict): async def store_weather_to_db(date_time: datetime, weather_data: dict):
conn = await get_db_connection() async with DB.get_connection() as conn:
try:
day_data = weather_data.get('days')[0]
DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}")
# Handle preciptype and stations as PostgreSQL arrays
preciptype_array = day_data.get('preciptype', []) or []
stations_array = day_data.get('stations', []) or []
date_str = date_time.strftime("%Y-%m-%d")
# Get location details from weather data if available
longitude = weather_data.get('longitude')
latitude = weather_data.get('latitude')
elevation = locate.get_elevation(latitude, longitude) # 152.4 # default until we add a geocoder that can look up actual elevation; weather_data.get('elevation') # assuming 'elevation' key, replace if different
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
# Correct for the datetime objects
day_data['datetime'] = await locate.localize_datetime(day_data.get('datetime')) #day_data.get('datetime'))
day_data['sunrise'] = day_data['datetime'].replace(hour=int(day_data.get('sunrise').split(':')[0]), minute=int(day_data.get('sunrise').split(':')[1]))
day_data['sunset'] = day_data['datetime'].replace(hour=int(day_data.get('sunset').split(':')[0]), minute=int(day_data.get('sunset').split(':')[1]))
daily_weather_params = (
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
day_data.get('sunset'), day_data.get('sunsetEpoch'),
day_data.get('description'), day_data.get('tempmax'),
day_data.get('tempmin'), day_data.get('uvindex'),
day_data.get('winddir'), day_data.get('windspeed'),
day_data.get('icon'), datetime.now(),
day_data.get('datetime'), day_data.get('datetimeEpoch'),
day_data.get('temp'), day_data.get('feelslikemax'),
day_data.get('feelslikemin'), day_data.get('feelslike'),
day_data.get('dew'), day_data.get('humidity'),
day_data.get('precip'), day_data.get('precipprob'),
day_data.get('precipcover'), preciptype_array,
day_data.get('snow'), day_data.get('snowdepth'),
day_data.get('windgust'), day_data.get('pressure'),
day_data.get('cloudcover'), day_data.get('visibility'),
day_data.get('solarradiation'), day_data.get('solarenergy'),
day_data.get('severerisk', 0), day_data.get('moonphase'),
day_data.get('conditions'), stations_array, day_data.get('source'),
location_point
)
except Exception as e:
ERR(f"Failed to prepare database query in store_weather_to_db! {e}")
try: try:
day_data = weather_data.get('days')[0] daily_weather_query = '''
DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}") INSERT INTO DailyWeather (
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
# Handle preciptype and stations as PostgreSQL arrays tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
preciptype_array = day_data.get('preciptype', []) or [] datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
stations_array = day_data.get('stations', []) or [] dew, humidity, precip, precipprob, precipcover, preciptype,
snow, snowdepth, windgust, pressure, cloudcover, visibility,
date_str = date_time.strftime("%Y-%m-%d") solarradiation, solarenergy, severerisk, moonphase, conditions,
stations, source, location
# Get location details from weather data if available ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38)
longitude = weather_data.get('longitude') RETURNING id
latitude = weather_data.get('latitude') '''
elevation = locate.get_elevation(latitude, longitude) # 152.4 # default until we add a geocoder that can look up actual elevation; weather_data.get('elevation') # assuming 'elevation' key, replace if different
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None # Debug logs for better insights
# DEBUG("Executing query: %s", daily_weather_query)
# Correct for the datetime objects # DEBUG("With parameters: %s", daily_weather_params)
day_data['datetime'] = localize_datetime(day_data.get('datetime')) #day_data.get('datetime'))
day_data['sunrise'] = day_data['datetime'].replace(hour=int(day_data.get('sunrise').split(':')[0]), minute=int(day_data.get('sunrise').split(':')[1]))
day_data['sunset'] = day_data['datetime'].replace(hour=int(day_data.get('sunset').split(':')[0]), minute=int(day_data.get('sunset').split(':')[1]))
daily_weather_params = (
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
day_data.get('sunset'), day_data.get('sunsetEpoch'),
day_data.get('description'), day_data.get('tempmax'),
day_data.get('tempmin'), day_data.get('uvindex'),
day_data.get('winddir'), day_data.get('windspeed'),
day_data.get('icon'), datetime.now(),
day_data.get('datetime'), day_data.get('datetimeEpoch'),
day_data.get('temp'), day_data.get('feelslikemax'),
day_data.get('feelslikemin'), day_data.get('feelslike'),
day_data.get('dew'), day_data.get('humidity'),
day_data.get('precip'), day_data.get('precipprob'),
day_data.get('precipcover'), preciptype_array,
day_data.get('snow'), day_data.get('snowdepth'),
day_data.get('windgust'), day_data.get('pressure'),
day_data.get('cloudcover'), day_data.get('visibility'),
day_data.get('solarradiation'), day_data.get('solarenergy'),
day_data.get('severerisk', 0), day_data.get('moonphase'),
day_data.get('conditions'), stations_array, day_data.get('source'),
location_point
)
except Exception as e:
ERR(f"Failed to prepare database query in store_weather_to_db! {e}")
try:
daily_weather_query = '''
INSERT INTO DailyWeather (
sunrise, sunriseEpoch, sunset, sunsetEpoch, description,
tempmax, tempmin, uvindex, winddir, windspeed, icon, last_updated,
datetime, datetimeEpoch, temp, feelslikemax, feelslikemin, feelslike,
dew, humidity, precip, precipprob, precipcover, preciptype,
snow, snowdepth, windgust, pressure, cloudcover, visibility,
solarradiation, solarenergy, severerisk, moonphase, conditions,
stations, source, location
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38)
RETURNING id
'''
# Debug logs for better insights
# DEBUG("Executing query: %s", daily_weather_query)
# DEBUG("With parameters: %s", daily_weather_params)
# Execute the query to insert daily weather data
async with conn.transaction():
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
if 'hours' in day_data:
for hour_data in day_data['hours']:
try:
await asyncio.sleep(0.1)
# hour_data['datetime'] = parse_date(hour_data.get('datetime'))
hour_timestamp = date_str + ' ' + hour_data['datetime']
hour_data['datetime'] = localize_datetime(hour_timestamp)
DEBUG(f"Processing hours now...")
# DEBUG(f"Processing {hour_data['datetime']}")
hour_preciptype_array = hour_data.get('preciptype', []) or []
hour_stations_array = hour_data.get('stations', []) or []
hourly_weather_params = (
daily_weather_id,
hour_data['datetime'],
hour_data.get('datetimeEpoch'),
hour_data['temp'],
hour_data['feelslike'],
hour_data['humidity'],
hour_data['dew'],
hour_data['precip'],
hour_data['precipprob'],
hour_preciptype_array,
hour_data['snow'],
hour_data['snowdepth'],
hour_data['windgust'],
hour_data['windspeed'],
hour_data['winddir'],
hour_data['pressure'],
hour_data['cloudcover'],
hour_data['visibility'],
hour_data['solarradiation'],
hour_data['solarenergy'],
hour_data['uvindex'],
hour_data.get('severerisk', 0),
hour_data['conditions'],
hour_data['icon'],
hour_stations_array,
hour_data.get('source', ''),
)
# Execute the query to insert daily weather data
async with conn.transaction():
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
if 'hours' in day_data:
for hour_data in day_data['hours']:
try: try:
hourly_weather_query = ''' await asyncio.sleep(0.1)
INSERT INTO HourlyWeather (daily_weather_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob, # hour_data['datetime'] = parse_date(hour_data.get('datetime'))
preciptype, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy, hour_timestamp = date_str + ' ' + hour_data['datetime']
uvindex, severerisk, conditions, icon, stations, source) hour_data['datetime'] = await locate.localize_datetime(hour_timestamp)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26) DEBUG(f"Processing hours now...")
RETURNING id # DEBUG(f"Processing {hour_data['datetime']}")
'''
# Debug logs for better insights hour_preciptype_array = hour_data.get('preciptype', []) or []
# DEBUG("Executing query: %s", hourly_weather_query) hour_stations_array = hour_data.get('stations', []) or []
# DEBUG("With parameters: %s", hourly_weather_params) hourly_weather_params = (
daily_weather_id,
hour_data['datetime'],
hour_data.get('datetimeEpoch'),
hour_data['temp'],
hour_data['feelslike'],
hour_data['humidity'],
hour_data['dew'],
hour_data['precip'],
hour_data['precipprob'],
hour_preciptype_array,
hour_data['snow'],
hour_data['snowdepth'],
hour_data['windgust'],
hour_data['windspeed'],
hour_data['winddir'],
hour_data['pressure'],
hour_data['cloudcover'],
hour_data['visibility'],
hour_data['solarradiation'],
hour_data['solarenergy'],
hour_data['uvindex'],
hour_data.get('severerisk', 0),
hour_data['conditions'],
hour_data['icon'],
hour_stations_array,
hour_data.get('source', ''),
)
try:
hourly_weather_query = '''
INSERT INTO HourlyWeather (daily_weather_id, datetime, datetimeEpoch, temp, feelslike, humidity, dew, precip, precipprob,
preciptype, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy,
uvindex, severerisk, conditions, icon, stations, source)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26)
RETURNING id
'''
# Debug logs for better insights
# DEBUG("Executing query: %s", hourly_weather_query)
# DEBUG("With parameters: %s", hourly_weather_params)
# Execute the query to insert hourly weather data
async with conn.transaction():
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
# ERR(f"\n{hourly_weather_id}")
except Exception as e:
ERR(f"EXCEPTION: {e}")
# Execute the query to insert hourly weather data
async with conn.transaction():
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
# ERR(f"\n{hourly_weather_id}")
except Exception as e: except Exception as e:
ERR(f"EXCEPTION: {e}") ERR(f"EXCEPTION: {e}")
except Exception as e: return "SUCCESS"
ERR(f"EXCEPTION: {e}")
except Exception as e:
return "SUCCESS" ERR(f"Error in dailyweather storage: {e}")
except Exception as e:
ERR(f"Error in dailyweather storage: {e}")
async def get_weather_from_db(date_time: datetime, latitude: float, longitude: float): async def get_weather_from_db(date_time: datetime, latitude: float, longitude: float):
conn = await get_db_connection() async with DB.get_connection() as conn:
query_date = date_time.date()
try:
# Query to get daily weather data
query = '''
SELECT DW.* FROM DailyWeather DW
WHERE DW.datetime::date = $1
AND ST_DWithin(DW.location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
ORDER BY ST_Distance(DW.location, ST_MakePoint($4, $5)::geography) ASC
LIMIT 1
'''
query_date = date_time.date() daily_weather_data = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
try:
# Query to get daily weather data
query = '''
SELECT DW.* FROM DailyWeather DW
WHERE DW.datetime::date = $1
AND ST_DWithin(DW.location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
ORDER BY ST_Distance(DW.location, ST_MakePoint($4, $5)::geography) ASC
LIMIT 1
'''
daily_weather_data = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude) if daily_weather_data is None:
DEBUG(f"No daily weather data retrieved from database.")
return None
# else:
# DEBUG(f"Daily_weather_data: {daily_weather_data}")
# Query to get hourly weather data
query = '''
SELECT HW.* FROM HourlyWeather HW
WHERE HW.daily_weather_id = $1
'''
hourly_weather_data = await conn.fetch(query, daily_weather_data['id'])
if daily_weather_data is None: day: Dict = {
DEBUG(f"No daily weather data retrieved from database.") 'DailyWeather': dict(daily_weather_data),
return None 'HourlyWeather': [dict(row) for row in hourly_weather_data],
# else: }
# DEBUG(f"Daily_weather_data: {daily_weather_data}") # DEBUG(f"day: {day}")
# Query to get hourly weather data return day
query = ''' except Exception as e:
SELECT HW.* FROM HourlyWeather HW ERR(f"Unexpected error occurred: {e}")
WHERE HW.daily_weather_id = $1
'''
hourly_weather_data = await conn.fetch(query, daily_weather_data['id'])
day: Dict = {
'DailyWeather': dict(daily_weather_data),
'HourlyWeather': [dict(row) for row in hourly_weather_data],
}
# DEBUG(f"day: {day}")
return day
except Exception as e:
ERR(f"Unexpected error occurred: {e}")

View file

@ -17,6 +17,8 @@ from datetime import datetime, date, time
from typing import Optional, Union, Tuple from typing import Optional, Union, Tuple
import asyncio import asyncio
from PIL import Image from PIL import Image
import pandas as pd
from scipy.spatial import cKDTree
from dateutil.parser import parse as dateutil_parse from dateutil.parser import parse as dateutil_parse
from docx import Document from docx import Document
import asyncpg import asyncpg
@ -24,7 +26,7 @@ from sshtunnel import SSHTunnelForwarder
from fastapi import Depends, HTTPException, Request, UploadFile from fastapi import Depends, HTTPException, Request, UploadFile
from fastapi.security.api_key import APIKeyHeader from fastapi.security.api_key import APIKeyHeader
from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL
from sijapi import DB, GLOBAL_API_KEY, DB, DB_HOST, DB_PORT, DB_USER, DB_PASS, TZ, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_FILENAME_LENGTH from sijapi import GLOBAL_API_KEY, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_FILENAME_LENGTH
api_key_header = APIKeyHeader(name="Authorization") api_key_header = APIKeyHeader(name="Authorization")
@ -141,64 +143,38 @@ def sanitize_filename(text, max_length=MAX_FILENAME_LENGTH):
"""Sanitize a string to be used as a safe filename while protecting the file extension.""" """Sanitize a string to be used as a safe filename while protecting the file extension."""
DEBUG(f"Filename before sanitization: {text}") DEBUG(f"Filename before sanitization: {text}")
# Replace multiple spaces with a single space and remove other whitespace
text = re.sub(r'\s+', ' ', text) text = re.sub(r'\s+', ' ', text)
# Remove any non-word characters except space, dot, and hyphen
sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text) sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text)
# Remove leading/trailing spaces
sanitized = sanitized.strip() sanitized = sanitized.strip()
# Split the filename into base name and extension
base_name, extension = os.path.splitext(sanitized) base_name, extension = os.path.splitext(sanitized)
# Calculate the maximum length for the base name
max_base_length = max_length - len(extension) max_base_length = max_length - len(extension)
# Truncate the base name if necessary
if len(base_name) > max_base_length: if len(base_name) > max_base_length:
base_name = base_name[:max_base_length].rstrip() base_name = base_name[:max_base_length].rstrip()
# Recombine the base name and extension
final_filename = base_name + extension final_filename = base_name + extension
# In case the extension itself is too long, truncate the entire filename
if len(final_filename) > max_length:
final_filename = final_filename[:max_length]
DEBUG(f"Filename after sanitization: {final_filename}") DEBUG(f"Filename after sanitization: {final_filename}")
return final_filename return final_filename
def check_file_name(file_name, max_length=255): def check_file_name(file_name, max_length=255):
"""Check if the file name needs sanitization based on the criteria of the second sanitize_filename function.""" """Check if the file name needs sanitization based on the criteria of the second sanitize_filename function."""
DEBUG(f"Checking filename: {file_name}")
needs_sanitization = False needs_sanitization = False
# Check for length
if len(file_name) > max_length: if len(file_name) > max_length:
DEBUG(f"Filename exceeds maximum length of {max_length}") DEBUG(f"Filename exceeds maximum length of {max_length}: {file_name}")
needs_sanitization = True needs_sanitization = True
# Check for non-word characters (except space, dot, and hyphen)
if re.search(ALLOWED_FILENAME_CHARS, file_name): if re.search(ALLOWED_FILENAME_CHARS, file_name):
DEBUG("Filename contains non-word characters (except space, dot, and hyphen)") DEBUG(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
needs_sanitization = True needs_sanitization = True
# Check for multiple consecutive spaces
if re.search(r'\s{2,}', file_name): if re.search(r'\s{2,}', file_name):
DEBUG("Filename contains multiple consecutive spaces") DEBUG(f"Filename contains multiple consecutive spaces: {file_name}")
needs_sanitization = True needs_sanitization = True
# Check for leading/trailing spaces
if file_name != file_name.strip(): if file_name != file_name.strip():
DEBUG("Filename has leading or trailing spaces") DEBUG(f"Filename has leading or trailing spaces: {file_name}")
needs_sanitization = True needs_sanitization = True
DEBUG(f"Filename {'needs' if needs_sanitization else 'does not need'} sanitization")
return needs_sanitization return needs_sanitization
@ -381,49 +357,6 @@ def convert_to_unix_time(iso_date_str):
return int(dt.timestamp()) return int(dt.timestamp())
async def get_db_connection():
conn = await asyncpg.connect(
database=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
return conn
temp = """
def get_db_connection_ssh(ssh: bool = True):
if ssh:
with SSHTunnelForwarder(
(DB_SSH, 22),
DB_SSH_USER=DB_SSH_USER,
DB_SSH_PASS=DB_SSH_PASS,
remote_bind_address=DB_SSH,
local_bind_address=(DB_HOST, DB_PORT)
) as tunnel: conn = psycopg2.connect(
dbname=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
else:
conn = psycopg2.connect(
dbname=DB,
user=DB_USER,
password=DB_PASS,
host=DB_HOST,
port=DB_PORT
)
return conn
"""
def db_localized():
# ssh = True if TS_IP == DB_SSH else False
return get_db_connection()
def haversine(lat1, lon1, lat2, lon2): def haversine(lat1, lon1, lat2, lon2):
""" Calculate the great circle distance between two points on the earth specified in decimal degrees. """ """ Calculate the great circle distance between two points on the earth specified in decimal degrees. """
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2]) lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
@ -445,30 +378,6 @@ def convert_degrees_to_cardinal(d):
return dirs[ix % len(dirs)] return dirs[ix % len(dirs)]
def localize_datetime(dt):
initial_dt = dt
try:
if isinstance(dt, str):
dt = dateutil_parse(dt)
DEBUG(f"{initial_dt} was a string so we attempted converting to datetime. Result: {dt}")
if isinstance(dt, datetime):
DEBUG(f"{dt} is a datetime object, so we will ensure it is tz-aware.")
if dt.tzinfo is None:
dt = dt.replace(tzinfo=TZ)
# DEBUG(f"{dt} should now be tz-aware. Returning it now.")
return dt
else:
# DEBUG(f"{dt} already was tz-aware. Returning it now.")
return dt
else:
ERR(f"Conversion failed")
raise TypeError("Conversion failed")
except Exception as e:
ERR(f"Error parsing datetime: {e}")
raise TypeError("Input must be a string or datetime object")
HOURLY_COLUMNS_MAPPING = { HOURLY_COLUMNS_MAPPING = {
"12am": "00:00:00", "12am": "00:00:00",
@ -531,4 +440,22 @@ def resize_and_convert_image(image_path, max_size=2160, quality=80):
img.save(img_byte_arr, format='JPEG', quality=quality) img.save(img_byte_arr, format='JPEG', quality=quality)
img_byte_arr = img_byte_arr.getvalue() img_byte_arr = img_byte_arr.getvalue()
return img_byte_arr return img_byte_arr
def load_geonames_data(path: str):
columns = ['geonameid', 'name', 'asciiname', 'alternatenames',
'latitude', 'longitude', 'feature_class', 'feature_code',
'country_code', 'cc2', 'admin1_code', 'admin2_code', 'admin3_code',
'admin4_code', 'population', 'elevation', 'dem', 'timezone', 'modification_date']
data = pd.read_csv(
path,
sep='\t',
header=None,
names=columns,
low_memory=False
)
return data