Auto-update: Tue Jun 25 17:42:57 PDT 2024

This commit is contained in:
sanj 2024-06-25 17:42:57 -07:00
parent a60698fc1f
commit dee4d83bdf
17 changed files with 2 additions and 3358 deletions

2
.gitignore vendored
View file

@ -13,6 +13,8 @@ sijapi/config/*.yaml
sijapi/config/O365/ sijapi/config/O365/
sijapi/local_only/ sijapi/local_only/
sijapi/testbed/ sijapi/testbed/
khoj/
r2r/
**/.env **/.env
**/.config.yaml **/.config.yaml

1
r2r

@ -1 +0,0 @@
Subproject commit c9e7c04a6bf9f8156cf793ee23379eb0f92f2d38

View file

@ -1,195 +0,0 @@
from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, status
from fastapi.responses import JSONResponse
import httpx
import json
from pathlib import Path
import asyncio
from datetime import datetime
import os, io
from PyPDF2 import PdfReader
import aiohttp
hook = FastAPI()
# /Users/sij/Library/CloudStorage/OneDrive-WELC/Documents - WELC-Docket
SYNC_FOLDER = Path(__file__).resolve().parent.parent
HOME_FOLDER = Path.home()
DOCKETS_FOLDER = HOME_FOLDER / "Dockets"
SEARCH_FOLDER = HOME_FOLDER / "Watched Cases"
SCRIPTS_FOLDER = SYNC_FOLDER / ".scripts"
REQUESTS_FOLDER = HOME_FOLDER / "sync" / "requests"
COURTLISTENER_BASE_URL = "https://www.courtlistener.com"
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
COURTLISTENER_API_KEY = "efb5fe00f3c6c88d65a32541260945befdf53a7e"
with open(SCRIPTS_FOLDER / 'caseTable.json', 'r') as file:
CASE_TABLE = json.load(file)
@hook.get("/health")
async def health():
return {"status": "ok"}
@hook.post("/cl/docket")
async def respond(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
logging.info(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_docket.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(process_docket, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def process_docket(result):
async with httpx.AsyncClient() as session:
await process_docket_result(result, session)
async def process_docket_result(result, session):
docket = str(result.get('docket'))
case_code, case_shortname = get_case_details(docket)
date_filed = result.get('date_filed', 'No Date Filed')
try:
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
except ValueError:
date_filed_formatted = 'NoDateFiled'
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
logging.info(f"Fetching CourtListener docket information for {docket}...")
data = await response.json()
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name']
logging.info(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else:
logging.info("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket'
case_name = 'NoCaseName'
for document in result.get('recap_documents', []):
filepath_ia = document.get('filepath_ia')
filepath_local = document.get('filepath_local')
if filepath_ia:
file_url = filepath_ia
logging.info(f"Found IA file at {file_url}.")
elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
logging.info(f"Found local file at {file_url}.")
else:
logging.info(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue
document_number = document.get('document_number', 'NoDocumentNumber')
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
description = description[:50] # Truncate description
# case_shortname = case_name # TEMPORARY OVERRIDE
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
target_path = Path(DOCKETS_FOLDER) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await download_file(file_url, target_path, session)
logging.info(f"Downloaded {file_name} to {target_path}")
def get_case_details(docket):
case_info = CASE_TABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
case_code = case_info.get("code")
short_name = case_info.get("shortname")
return case_code, short_name
async def download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
async with aiohttp.ClientSession() as session:
logging.info(f"Attempting to download {url} to {path}.")
try:
async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403:
logging.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return
response.raise_for_status()
# Check if the response content type is a PDF
content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf':
logging.error(f"Invalid content type: {content_type}. Skipping download.")
return
# Create an in-memory buffer to store the downloaded content
buffer = io.BytesIO()
async for chunk in response.content.iter_chunked(1024):
buffer.write(chunk)
# Reset the buffer position to the beginning
buffer.seek(0)
# Validate the downloaded PDF content
try:
PdfReader(buffer)
except Exception as e:
logging.error(f"Invalid PDF content: {str(e)}. Skipping download.")
return
# If the PDF is valid, write the content to the file on disk
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('wb') as file:
file.write(buffer.getvalue())
except Exception as e:
logging.error(f"Error downloading file: {str(e)}")
@hook.post("/cl/search")
async def respond_search(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
logging.info(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
# Save the payload data
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_search.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(process_search_result, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def process_search_result(result):
async with httpx.AsyncClient() as session:
download_url = result.get('download_url')
court_id = result.get('court_id')
case_name_short = result.get('caseNameShort')
case_name = result.get('caseName')
logging.info(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id
if case_name_short:
case_folder = case_name_short
else:
case_folder = case_name
file_name = download_url.split('/')[-1]
target_path = Path(SEARCH_FOLDER) / court_folder / case_folder / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await download_file(download_url, target_path, session)
logging.info(f"Downloaded {file_name} to {target_path}")

View file

@ -1,32 +0,0 @@
import json
import requests
# Load the caseTable.json file
with open('caseTable.json', 'r') as file:
case_table = json.load(file)
# Set the base URL and authorization token
base_url = "https://www.courtlistener.com/api/rest/v3/docket-alerts/"
auth_token = "a90d3f2de489aa4138a32133ca8bfec9d85fecfa"
# Iterate through each key (docket ID) in the case table
for docket_id in case_table.keys():
# Set the data payload and headers for the request
data = {'docket': docket_id}
headers = {'Authorization': f'Token {auth_token}'}
try:
# Send the POST request to the CourtListener API
response = requests.post(base_url, data=data, headers=headers)
# Check the response status code
if response.status_code == 200:
print(f"Successfully created docket alert for docket ID: {docket_id}")
else:
print(f"Failed to create docket alert for docket ID: {docket_id}")
print(f"Status code: {response.status_code}")
print(f"Response content: {response.content}")
except requests.exceptions.RequestException as e:
print(f"Error occurred while creating docket alert for docket ID: {docket_id}")
print(f"Error message: {str(e)}")

View file

@ -1,17 +0,0 @@
#!/bin/bash
# Iterate from 18 to 30
for i in $(seq -w 01 31); do
# Construct the date string
DATE="2024-07-${i}"
# Print the date being processed (optional)
echo "Processing date: $DATE"
# Run the curl command
curl -X POST -H "Content-Type: application/json" -d '{"mood": "joyful"}' "http://localhost:4444/note/banner?dt=$DATE"
# Wait for the curl command to finish before starting the next iteration
wait
done

View file

@ -1,47 +0,0 @@
# IMPORTANT: This is just here as a placeholder. It will not work. Export your own widget.shell file from Secure Shellfish to enable the alert and widget functionalities.
# Updates watch complications for Secure ShellFish
#
# This command sends encrypted data through push notifications such
# that it doesn't need to run from a Secure ShellFish terminal.
if [[ $# -eq 0 ]]; then
cat <<EOF
# Usage: widget [target] <data> ...
# Update complication on device from which this function was installed with a number of content parameters that can be string, progress, icon, target or color.
# Each argument type is derived from input.
# Progress has the form: 50% or 110/220
# Icon must match valid SF Symbol name such as globe or terminal.fill
# Colors must be hex colours such as #000 #ff00ff where the color is used for later content and 'foreground' switches back to default colour
# Target is used to send different content to different complications after configuring the complications with different target identifiers which requires the pro unlock. The target parameter is never assumed unless --target is used and is effective until next --target parameter allowing updates of several complications with a single command
# You can configure complications to only show content for a given target.
# String is the fallback type if nothing else matches, but content type can be forced for next parameter with --progress, --icon, --color, --text or --target with
# something like:
widget --text "50/100"
# You can update several complications at once by using --target to send all parameters until the next --target to a particular complication. Updating several complications at once allows more total updates per day.
# EOF
# return 0
# fi
# local key=SECRET
# local user=SECRET
# local iv=SECRET
# local plain=$(
# echo Secure ShellFish Widget 2.0
# for var in "$@"
# do
# echo -ne "$var" | base64
# done)
# local base64=$(echo "$plain" | openssl enc -aes-256-cbc -base64 -K $key -iv $iv)
# curl -sS -X POST -H "Content-Type: text/plain" --data "$base64" "https://secureshellfish.app/push/?user=$user"

View file

@ -1,195 +0,0 @@
from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, status
from fastapi.responses import JSONResponse
import httpx
import json
from pathlib import Path
import asyncio
from datetime import datetime
import os, io
from PyPDF2 import PdfReader
import aiohttp
hook = FastAPI()
# /Users/sij/Library/CloudStorage/OneDrive-WELC/Documents - WELC-Docket
SYNC_FOLDER = Path(__file__).resolve().parent.parent
HOME_FOLDER = Path.home()
DOCKETS_FOLDER = HOME_FOLDER / "Dockets"
SEARCH_FOLDER = HOME_FOLDER / "Watched Cases"
SCRIPTS_FOLDER = SYNC_FOLDER / ".scripts"
REQUESTS_FOLDER = HOME_FOLDER / "sync" / "requests"
COURTLISTENER_BASE_URL = "https://www.courtlistener.com"
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
COURTLISTENER_API_KEY = "efb5fe00f3c6c88d65a32541260945befdf53a7e"
with open(SCRIPTS_FOLDER / 'caseTable.json', 'r') as file:
CASE_TABLE = json.load(file)
@hook.get("/health")
async def health():
return {"status": "ok"}
@hook.post("/cl/docket")
async def respond(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
logging.info(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_docket.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(process_docket, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def process_docket(result):
async with httpx.AsyncClient() as session:
await process_docket_result(result, session)
async def process_docket_result(result, session):
docket = str(result.get('docket'))
case_code, case_shortname = get_case_details(docket)
date_filed = result.get('date_filed', 'No Date Filed')
try:
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
except ValueError:
date_filed_formatted = 'NoDateFiled'
# Fetching court docket information from the API
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
logging.info(f"Fetching CourtListener docket information for {docket}...")
data = await response.json()
court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name']
logging.info(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else:
logging.info("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket'
case_name = 'NoCaseName'
for document in result.get('recap_documents', []):
filepath_ia = document.get('filepath_ia')
filepath_local = document.get('filepath_local')
if filepath_ia:
file_url = filepath_ia
logging.info(f"Found IA file at {file_url}.")
elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
logging.info(f"Found local file at {file_url}.")
else:
logging.info(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue
document_number = document.get('document_number', 'NoDocumentNumber')
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
description = description[:50] # Truncate description
# case_shortname = case_name # TEMPORARY OVERRIDE
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
target_path = Path(DOCKETS_FOLDER) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await download_file(file_url, target_path, session)
logging.info(f"Downloaded {file_name} to {target_path}")
def get_case_details(docket):
case_info = CASE_TABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
case_code = case_info.get("code")
short_name = case_info.get("shortname")
return case_code, short_name
async def download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
async with aiohttp.ClientSession() as session:
logging.info(f"Attempting to download {url} to {path}.")
try:
async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403:
logging.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return
response.raise_for_status()
# Check if the response content type is a PDF
content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf':
logging.error(f"Invalid content type: {content_type}. Skipping download.")
return
# Create an in-memory buffer to store the downloaded content
buffer = io.BytesIO()
async for chunk in response.content.iter_chunked(1024):
buffer.write(chunk)
# Reset the buffer position to the beginning
buffer.seek(0)
# Validate the downloaded PDF content
try:
PdfReader(buffer)
except Exception as e:
logging.error(f"Invalid PDF content: {str(e)}. Skipping download.")
return
# If the PDF is valid, write the content to the file on disk
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('wb') as file:
file.write(buffer.getvalue())
except Exception as e:
logging.error(f"Error downloading file: {str(e)}")
@hook.post("/cl/search")
async def respond_search(request: Request, background_tasks: BackgroundTasks):
client_ip = request.client.host
logging.info(f"Received request from IP: {client_ip}")
data = await request.json()
payload = data['payload']
results = data['payload']['results']
# Save the payload data
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
payload_file = REQUESTS_FOLDER / f"{timestamp}-{client_ip}_search.json"
with open(payload_file, 'w') as file:
json.dump(payload, file, indent=2)
for result in results:
background_tasks.add_task(process_search_result, result)
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
async def process_search_result(result):
async with httpx.AsyncClient() as session:
download_url = result.get('download_url')
court_id = result.get('court_id')
case_name_short = result.get('caseNameShort')
case_name = result.get('caseName')
logging.info(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id
if case_name_short:
case_folder = case_name_short
else:
case_folder = case_name
file_name = download_url.split('/')[-1]
target_path = Path(SEARCH_FOLDER) / court_folder / case_folder / file_name
target_path.parent.mkdir(parents=True, exist_ok=True)
await download_file(download_url, target_path, session)
logging.info(f"Downloaded {file_name} to {target_path}")

View file

@ -1,32 +0,0 @@
import json
import requests
# Load the caseTable.json file
with open('caseTable.json', 'r') as file:
case_table = json.load(file)
# Set the base URL and authorization token
base_url = "https://www.courtlistener.com/api/rest/v3/docket-alerts/"
auth_token = "a90d3f2de489aa4138a32133ca8bfec9d85fecfa"
# Iterate through each key (docket ID) in the case table
for docket_id in case_table.keys():
# Set the data payload and headers for the request
data = {'docket': docket_id}
headers = {'Authorization': f'Token {auth_token}'}
try:
# Send the POST request to the CourtListener API
response = requests.post(base_url, data=data, headers=headers)
# Check the response status code
if response.status_code == 200:
print(f"Successfully created docket alert for docket ID: {docket_id}")
else:
print(f"Failed to create docket alert for docket ID: {docket_id}")
print(f"Status code: {response.status_code}")
print(f"Response content: {response.content}")
except requests.exceptions.RequestException as e:
print(f"Error occurred while creating docket alert for docket ID: {docket_id}")
print(f"Error message: {str(e)}")

View file

@ -1,146 +0,0 @@
#!/bin/bash
DB_NAME="weatherlocate.db"
# Step 1: Backup existing data
echo "Backing up existing data..."
sqlite3 $DB_NAME <<EOF
.headers on
.mode csv
.output hourly_weather_backup.csv
SELECT * FROM HourlyWeather;
.output daily_weather_backup.csv
SELECT * FROM DailyWeather;
.output hours_backup.csv
SELECT * FROM Hours;
.output days_backup.csv
SELECT * FROM Days;
EOF
# Step 2: Drop and recreate tables
echo "Dropping and recreating tables..."
sqlite3 $DB_NAME <<EOF
DROP TABLE IF EXISTS HourlyWeather;
DROP TABLE IF EXISTS DailyWeather;
DROP TABLE IF EXISTS Hours;
DROP TABLE IF EXISTS Days;
CREATE TABLE HourlyWeather (
id INTEGER PRIMARY KEY,
datetime TEXT NOT NULL,
temp REAL,
feelslike REAL,
humidity REAL,
dew REAL,
precip REAL,
precipprob REAL,
snow REAL,
snowdepth REAL,
windgust REAL,
windspeed REAL,
winddir REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
uvindex REAL,
severerisk REAL,
conditions TEXT,
icon TEXT,
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE DailyWeather (
id INTEGER PRIMARY KEY,
sunrise_time TEXT,
sunset_time TEXT,
description TEXT,
tempmax REAL,
tempmin REAL,
uvindex REAL,
winddir REAL,
windspeedmean REAL,
windspeed REAL,
icon TEXT,
last_updated TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE Hours (
id INTEGER PRIMARY KEY,
day_id INTEGER,
hour INTEGER,
hourly_weather_id INTEGER,
FOREIGN KEY (day_id) REFERENCES Days(id),
FOREIGN KEY (hourly_weather_id) REFERENCES HourlyWeather(id)
);
CREATE TABLE Days (
id INTEGER PRIMARY KEY,
date TEXT NOT NULL,
daily_weather_id INTEGER,
FOREIGN KEY (daily_weather_id) REFERENCES DailyWeather(id)
);
EOF
# Step 3: Import data from backup files
echo "Importing data from backup files..."
python3 <<EOF
import sqlite3
import csv
from datetime import datetime
def import_data():
conn = sqlite3.connect('$DB_NAME')
cursor = conn.cursor()
with open('hourly_weather_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO HourlyWeather (datetime, temp, feelslike, humidity, dew, precip, precipprob, snow, snowdepth, windgust, windspeed, winddir, pressure, cloudcover, visibility, solarradiation, solarenergy, uvindex, severerisk, conditions, icon, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
row['datetime'], row['temp'], row['feelslike'], row['humidity'], row['dew'], row['precip'],
row['precipprob'], row['snow'], row['snowdepth'], row['windgust'], row['windspeed'], row['winddir'],
row['pressure'], row['cloudcover'], row['visibility'], row['solarradiation'], row['solarenergy'], row['uvindex'],
row['severerisk'], row['conditions'], row['icon'],
datetime.strptime(row['last_updated'], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
))
with open('daily_weather_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO DailyWeather (sunrise_time, sunset_time, description, tempmax, tempmin, uvindex, winddir, windspeedmean, windspeed, icon, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
row['sunrise_time'], row['sunset_time'], row['description'], row['tempmax'], row['tempmin'],
row['uvindex'], row['winddir'], row['windspeedmean'], row['windspeed'], row['icon'],
datetime.strptime(row['last_updated'], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
))
with open('hours_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO Hours (day_id, hour, hourly_weather_id)
VALUES (?, ?, ?)
''', (row['day_id'], row['hour'], row['hourly_weather_id']))
with open('days_backup.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
cursor.execute('''
INSERT INTO Days (date, daily_weather_id)
VALUES (?, ?)
''', (row['date'], row['daily_weather_id']))
conn.commit()
conn.close()
import_data()
EOF
echo "Database rebuild complete."

View file

@ -1,123 +0,0 @@
import sqlite3
from pathlib import Path
# Get the home directory
home_dir = Path.home()
# Define the path to the database
DB = home_dir / "sync" / "sijapi" / "data" / "weatherlocate.db"
def create_database():
with sqlite3.connect(DB) as conn:
cursor = conn.cursor()
# Create the Locations table
cursor.execute('''
CREATE TABLE IF NOT EXISTS Locations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
street TEXT,
city TEXT,
state TEXT,
country TEXT,
latitude REAL,
longitude REAL,
zip TEXT,
elevation REAL,
last_updated DATETIME
);
''')
# Create the Days table with a direct reference to DailyWeather
cursor.execute('''
CREATE TABLE IF NOT EXISTS Days (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date DATE UNIQUE NOT NULL,
daily_weather_id INTEGER,
general_location_id INTEGER,
FOREIGN KEY(daily_weather_id) REFERENCES DailyWeather(id),
FOREIGN KEY(general_location_id) REFERENCES Locations(id)
);
''')
# Create the DailyWeather table with fields adjusted for direct CSV storage of preciptype
cursor.execute('''
CREATE TABLE IF NOT EXISTS DailyWeather (
id INTEGER PRIMARY KEY AUTOINCREMENT,
sunrise TEXT,
sunriseEpoch TEXT,
sunset TEXT,
sunsetEpoch TEXT,
description TEXT,
tempmax REAL,
tempmin REAL,
uvindex INTEGER,
winddir REAL,
windspeed REAL,
icon TEXT,
last_updated DATETIME,
datetime TEXT,
datetimeEpoch INTEGER,
temp REAL,
feelslikemax REAL,
feelslikemin REAL,
feelslike REAL,
dew REAL,
humidity REAL,
precip REAL,
precipprob REAL,
precipcover REAL,
preciptype TEXT,
snow REAL,
snowdepth REAL,
windgust REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
severerisk REAL,
moonphase REAL,
conditions TEXT,
stations TEXT,
source TEXT
);
''')
# Create the HourlyWeather table
cursor.execute('''
CREATE TABLE IF NOT EXISTS HourlyWeather (
id INTEGER PRIMARY KEY AUTOINCREMENT,
day_id INTEGER,
datetime TEXT,
datetimeEpoch INTEGER,
temp REAL,
feelslike REAL,
humidity REAL,
dew REAL,
precip REAL,
precipprob REAL,
snow REAL,
snowdepth REAL,
preciptype TEXT,
windgust REAL,
windspeed REAL,
winddir REAL,
pressure REAL,
cloudcover REAL,
visibility REAL,
solarradiation REAL,
solarenergy REAL,
uvindex REAL,
severerisk REAL,
conditions TEXT,
icon TEXT,
stations TEXT,
source TEXT,
FOREIGN KEY(day_id) REFERENCES Days(id)
);
''')
conn.commit()
if __name__ == "__main__":
create_database()

View file

@ -1,89 +0,0 @@
import osmium
import psycopg2
import json
from sijapi import DB_USER, DB_PASS, DB_HOST, DB, DATA_DIR
OSM_DATA_PATH = DATA_DIR / "north-america-latest.osm.pbf"
class OSMHandler(osmium.SimpleHandler):
def __init__(self, conn):
osmium.SimpleHandler.__init__(self)
self.conn = conn
def node(self, n):
tags = {tag.k: tag.v for tag in n.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO nodes (id, location, tags)
VALUES (%s, ST_SetSRID(ST_MAKEPOINT(%s, %s),4326), %s)
""",
(n.id, n.location.lon, n.location.lat, json.dumps(tags)))
self.conn.commit()
def way(self, w):
nodes = [(node.lon, node.lat) for node in w.nodes]
tags = {tag.k: tag.v for tag in w.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO ways (id, nodes, tags)
VALUES (%s, %s, %s)
""",
(w.id, json.dumps(nodes), json.dumps(tags)))
self.conn.commit()
def relation(self, r):
members = [{"type": m.type, "ref": m.ref, "role": m.role} for m in r.members]
tags = {tag.k: tag.v for tag in r.tags}
cur = self.conn.cursor()
cur.execute("""
INSERT INTO relations (id, members, tags)
VALUES (%s, %s, %s)
""",
(r.id, json.dumps(members), json.dumps(tags)))
self.conn.commit()
def main():
conn = psycopg2.connect(user=DB_USER, password=DB_PASS, dbname=DB, host=DB_HOST)
cur = conn.cursor()
# Drop existing tables if they exist
cur.execute("DROP TABLE IF EXISTS nodes")
cur.execute("DROP TABLE IF EXISTS ways")
cur.execute("DROP TABLE IF EXISTS relations")
# Create tables for nodes, ways, and relations
cur.execute("""
CREATE TABLE nodes (
id bigint PRIMARY KEY,
location geography(POINT, 4326),
tags jsonb
)
""")
cur.execute("""
CREATE TABLE ways (
id bigint PRIMARY KEY,
nodes jsonb,
tags jsonb
)
""")
cur.execute("""
CREATE TABLE relations (
id bigint PRIMARY KEY,
members jsonb,
tags jsonb
)
""")
conn.commit()
handler = OSMHandler(conn)
handler.apply_file(str(OSM_DATA_PATH))
cur.close()
conn.close()
if __name__ == "__main__":
main()

View file

@ -1,76 +0,0 @@
from vectordb import Memory
memory = Memory(memory_file="embedding.pt",
chunking_strategy={"mode": "sliding_window", "window_size": 128, "overlap": 16}, embeddings='TaylorAI/bge-micro-v2'
)
texts = [
"""
Machine learning is a method of data analysis that automates analytical model building.
It is a branch of artificial intelligence based on the idea that systems can learn from data,
identify patterns and make decisions with minimal human intervention.
Machine learning algorithms are trained on data sets that contain examples of the desired output. For example, a machine learning algorithm that is used to classify images might be trained on a data set that contains images of cats and dogs.
Once an algorithm is trained, it can be used to make predictions on new data. For example, the machine learning algorithm that is used to classify images could be used to predict whether a new image contains a cat or a dog.
Machine learning algorithms can be used to solve a wide variety of problems. Some common applications of machine learning include:
Classification: Categorizing data into different groups. For example, a machine learning algorithm could be used to classify emails as spam or not spam.
Regression: Predicting a continuous value. For example, a machine learning algorithm could be used to predict the price of a house.
Clustering: Finding groups of similar data points. For example, a machine learning algorithm could be used to find groups of customers with similar buying habits.
Anomaly detection: Finding data points that are different from the rest of the data. For example, a machine learning algorithm could be used to find fraudulent credit card transactions.
Machine learning is a powerful tool that can be used to solve a wide variety of problems. As the amount of data available continues to grow, machine learning is likely to become even more important in the future.
""",
"""
Artificial intelligence (AI) is the simulation of human intelligence in machines
that are programmed to think like humans and mimic their actions.
The term may also be applied to any machine that exhibits traits associated with
a human mind such as learning and problem-solving.
AI research has been highly successful in developing effective techniques for solving a wide range of problems, from game playing to medical diagnosis.
However, there is still a long way to go before AI can truly match the intelligence of humans. One of the main challenges is that human intelligence is incredibly complex and poorly understood.
Despite the challenges, AI is a rapidly growing field with the potential to revolutionize many aspects of our lives. Some of the potential benefits of AI include:
Increased productivity: AI can be used to automate tasks that are currently performed by humans, freeing up our time for more creative and fulfilling activities.
Improved decision-making: AI can be used to make more informed decisions, based on a wider range of data than humans can typically access.
Enhanced creativity: AI can be used to generate new ideas and solutions, beyond what humans can imagine on their own.
Of course, there are also potential risks associated with AI, such as:
Job displacement: As AI becomes more capable, it is possible that it will displace some human workers.
Weaponization: AI could be used to develop new weapons that are more powerful and destructive than anything we have today.
Loss of control: If AI becomes too powerful, we may lose control over it, with potentially disastrous consequences.
It is important to weigh the potential benefits and risks of AI carefully as we continue to develop this technology. With careful planning and oversight, AI has the potential to make the world a better place. However, if we are not careful, it could also lead to serious problems.
""",
]
metadata_list = [
{
"title": "Introduction to Machine Learning",
"url": "https://example.com/introduction-to-machine-learning",
},
{
"title": "Introduction to Artificial Intelligence",
"url": "https://example.com/introduction-to-artificial-intelligence",
},
]
memory.save(texts, metadata_list)
query = "What is the relationship between AI and machine learning?"
results = memory.search(query, top_n=3, unique=True)
print(results)
# two results will be returned as unique param is set to True

View file

@ -1,15 +0,0 @@
from vectordb import Memory
# Memory is where all content you want to store/search goes.
memory = Memory()
memory.save(
["apples are green", "oranges are orange"], # save your text content. for long text we will automatically chunk it
[{"url": "https://apples.com"}, {"url": "https://oranges.com"}], # associate any kind of metadata with it (optional)
)
# Search for top n relevant results, automatically using embeddings
query = "green"
results = memory.search(query, top_n = 1)
print(results)

View file

@ -1,17 +0,0 @@
#!/bin/bash
# Iterate from 18 to 30
for i in $(seq -w 01 31); do
# Construct the date string
DATE="2024-07-${i}"
# Print the date being processed (optional)
echo "Processing date: $DATE"
# Run the curl command
curl -X POST -H "Content-Type: application/json" -d '{"mood": "joyful"}' "http://localhost:4444/note/banner?dt=$DATE"
# Wait for the curl command to finish before starting the next iteration
wait
done

File diff suppressed because it is too large Load diff