From 1a987996f62533032d21087d5bb164619164d1b9 Mon Sep 17 00:00:00 2001 From: sanj <67624670+iodrift@users.noreply.github.com> Date: Tue, 6 Aug 2024 19:55:48 -0700 Subject: [PATCH] Auto-update: Tue Aug 6 19:55:48 PDT 2024 --- .../README.md | 3 + .../archivist.js | 119 ++++++++++ Extras/Caddyfile/Caddyfile.example | 218 ++++++++++++++++++ Extras/Caddyfile/README.md | 1 + .../GPS.py | 63 +++++ .../README.md | 1 + .../uploadGPS.py | 1 + sijapi/__main__.py | 25 +- 8 files changed, 421 insertions(+), 10 deletions(-) create mode 100644 Extras/Archivist.js UserScript (web archiving)/README.md create mode 100644 Extras/Archivist.js UserScript (web archiving)/archivist.js create mode 100644 Extras/Caddyfile/Caddyfile.example create mode 100644 Extras/Caddyfile/README.md create mode 100644 Extras/GPS.py Pythonista Script (location tracking)/GPS.py create mode 100644 Extras/GPS.py Pythonista Script (location tracking)/README.md create mode 100644 Extras/GPS.py Pythonista Script (location tracking)/uploadGPS.py diff --git a/Extras/Archivist.js UserScript (web archiving)/README.md b/Extras/Archivist.js UserScript (web archiving)/README.md new file mode 100644 index 0000000..4f64498 --- /dev/null +++ b/Extras/Archivist.js UserScript (web archiving)/README.md @@ -0,0 +1,3 @@ +This is designed to work with UserScripts and similar browser extensions. Fill in the domain/URL where your sijapi instance is exposed (http://localhost:4444 is fine for the same device, but consider using a reverse proxy to extend to your mobile devices). + +And fill in your GLOBAL_API_KEY that you chose when configuring sijapi. diff --git a/Extras/Archivist.js UserScript (web archiving)/archivist.js b/Extras/Archivist.js UserScript (web archiving)/archivist.js new file mode 100644 index 0000000..0e91e5b --- /dev/null +++ b/Extras/Archivist.js UserScript (web archiving)/archivist.js @@ -0,0 +1,119 @@ +// ==UserScript== +// @name Archivist +// @version 0.5 +// @description archivist userscript posts to sij.ai/archive +// @author sij +// @match *://*/* +// @grant GM_xmlhttpRequest +// ==/UserScript== + +(function () { + "use strict"; + + // Function to check if the URL is likely an ad, tracker, or unwanted resource + function isUnwantedURL(url) { + const unwantedPatterns = [ + /doubleclick\.net/, + /googlesyndication\.com/, + /adservice\./, + /analytics\./, + /tracker\./, + /pixel\./, + /ad\d*\./, + /\.ad\./, + /ads\./, + /\/ads\//, + /url=http/, + /safeframe/, + /adsystem/, + /adserver/, + /adnetwork/, + /sync\./, + /beacon\./, + /optimizely/, + /outbrain/, + /widgets\./, + /cdn\./, + /pixel\?/, + /recaptcha/, + /accounts\.google\.com\/gsi/, + /imasdk\.googleapis\.com/, + /amplify-imp/, + /zemanta/, + /monitor\.html/, + /widgetMonitor/, + /nanoWidget/, + /client_storage/, + ]; + return unwantedPatterns.some((pattern) => pattern.test(url)); + } + + // Function to archive the page + function archivePage() { + var currentUrl = window.location.href; + + if (isUnwantedURL(currentUrl)) { + console.log("Skipping unwanted URL:", currentUrl); + return; + } + + var data = new URLSearchParams({ + title: document.title, + url: currentUrl, + referrer: document.referrer || "", + width: window.innerWidth ? window.innerWidth.toString() : "", + encoding: document.characterSet, + source: document.documentElement.outerHTML, + }); + + GM_xmlhttpRequest({ + method: "POST", + url: "https://api.sij.ai/archive?api_key=sk-NhrtQwCHNdK5sRZC", + headers: { + "Content-Type": "application/x-www-form-urlencoded", + Authorization: "bearer sk-NhrtQwCHNdK5sRZC", + }, + data: data.toString(), + onload: function (response) { + console.log("Archive request sent for:", currentUrl); + }, + onerror: function (error) { + console.error("Error sending archive request:", error); + }, + }); + } + + // Debounce function to limit how often archivePage can be called + function debounce(func, wait) { + let timeout; + return function executedFunction(...args) { + const later = () => { + clearTimeout(timeout); + func(...args); + }; + clearTimeout(timeout); + timeout = setTimeout(later, wait); + }; + } + + // Debounced version of archivePage + const debouncedArchivePage = debounce(archivePage, 2000); + + // Listen for navigation events + window.addEventListener("popstate", debouncedArchivePage); + + // Intercept link clicks + document.addEventListener( + "click", + function (e) { + var link = e.target.closest("a"); + if (link && !isUnwantedURL(link.href)) { + setTimeout(debouncedArchivePage, 1000); // Delay to allow page to load + } + }, + true + ); + + // Initial page load + setTimeout(archivePage, 5000); +})(); diff --git a/Extras/Caddyfile/Caddyfile.example b/Extras/Caddyfile/Caddyfile.example new file mode 100644 index 0000000..4feb2a2 --- /dev/null +++ b/Extras/Caddyfile/Caddyfile.example @@ -0,0 +1,218 @@ +{ + log { + # Specify path and log level for Caddy logs + output file /var/log/caddy/logfile.log + level INFO + } + + # replace `localhost` with an externally accessible IP address, e.g. a local LAN address or Tailscale IP. Take care not to use a publicly accessible IP address, as the Caddy API is not separately protected by API keys! + admin localhost:2019 + + servers { + metrics + } + + # Replace with your email address for SSL certificate registration + email info@example.com +} + +# This is a highly permissive CORS config. Dial it back as your use case allows. +(cors) { + @cors_preflight method OPTIONS + header { + Access-Control-Allow-Origin "*" + Access-Control-Expose-Headers "Authorization" + Access-Control-Allow-Credentials "true" + Access-Control-Allow-Headers "Authorization, Content-Type" + } + + handle @cors_preflight { + header { + Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE" + Access-Control-Max-Age "3600" + } + respond "" 204 + } +} + +# Replace with the subdomain you want to expose your API over +api.example.com { + import cors + + # Specify which endpoints do not require an API key + @public { + path /img/* /oauth /oauth/* /MS365 /MS365/* /ip /health /health* /health/* /id /identity + } + + # Accept your GLOBAL_API_KEY (specified via environment variable in Caddy's context) via `Authorization: Bearer` header + @apiKeyAuthHeader { + header Authorization "Bearer {env.GLOBAL_API_KEY}" + } + + # Optionally, accept your GLOBAL_API_KEY via query parameters + @apiKeyAuthQuery { + query api_key={env.GLOBAL_API_KEY} + } + + handle @public { + reverse_proxy { + # Specify the local (or Tailscale) IPs & ports where the API service is running + to 100.64.64.20:4444 100.64.64.11:4444 10.13.37.30:4444 localhost:4444 + lb_policy first + health_uri /health + health_interval 10s + health_timeout 5s + health_status 2xx + header_up X-Forwarded-For {remote} + header_up X-Forwarded-Proto {scheme} + } + } + + handle @apiKeyAuthHeader { + reverse_proxy { + # Specify the local (or Tailscale) IPs & ports where the API service is running + to 100.64.64.20:4444 100.64.64.11:4444 10.13.37.30:4444 localhost:4444 + lb_policy first + health_uri /health + health_interval 10s + health_timeout 5s + health_status 2xx + } + } + + handle @apiKeyAuthQuery { + reverse_proxy { + # Specify the local (or Tailscale) IPs & ports where the API service is running + to 100.64.64.20:4444 100.64.64.11:4444 10.13.37.30:4444 localhost:4444 + lb_policy first + health_uri /health + health_interval 10s + health_timeout 5s + health_status 2xx + } + } + + handle { + respond "Unauthorized: Valid API key required" 401 + } + + # Assuming you use Cloudflare for DNS challenges and have configured a CLOUDFLARE_API_TOKEN environmental variable in Caddy's context + tls { + dns cloudflare {env.CLOUDFLARE_API_TOKEN} + } + + log { + output file /var/log/caddy/sijapi.log { + roll_size 100mb + roll_keep 5 + roll_keep_for 720h + } + format json { + time_format "iso8601" + message_key "message" + } + } +} + +# Everything below here is ancillary to the primary API functionality +# If you have another domain you want to expose a particular endpoint on, try something like this -- e.g., here, https://sij.law/pgp as a short URL to share my public PGP key via. +sij.law { + reverse_proxy /pgp 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 { + lb_policy first + health_uri /health + health_interval 10s + health_timeout 5s + health_status 2xx + } + + # Because I maintain a seperate service on this domain (a Ghost blog), I need fall back handling for everything besides `/pgp`. + reverse_proxy localhost:2368 + tls { + dns cloudflare {env.CLOUDFLARE_API_TOKEN} + } +} + +# Another special use case example: this provides handling for my URL shortener. +sij.ai { + + # Any three-character alphanumeric URI is construed as a shortened URL. + @shorturl { + path_regexp ^/[a-zA-Z0-9]{3}$ + } + + # https://sij.ai/s points to the WebUI for my URL shortener + @shortener_ui { + path /s + } + + @apiKeyAuthHeader { + header Authorization "Bearer {env.GLOBAL_API_KEY}" + } + + @apiKeyAuthQuery { + query api_key={env.GLOBAL_API_KEY} + } + + @analytics { + path_regexp ^/analytics/[a-zA-Z0-9]{3}$ + } + + @pgp { + path /pgp + } + + handle @shortener_ui { + reverse_proxy 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 { + lb_policy first + health_uri /health + health_interval 10s + health_timeout 5s + health_status 2xx + } + } + + handle @shorturl { + reverse_proxy 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 { + lb_policy first + health_uri /health + health_interval 10s + health_timeout 5s + health_status 2xx + } + } + + handle @analytics { + reverse_proxy 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 { + lb_policy first + health_uri /health + health_interval 10s + health_timeout 5s + health_status 2xx + } + } + + # Handling for my public PGP key endpoint + handle @pgp { + reverse_proxy 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 { + lb_policy first + health_uri /health + health_interval 10s + health_timeout 5s + health_status 2xx + } + } + + # Base domain redirects to my Ghost blog + handle / { + redir https://sij.law permanent + } + + # All URIs that don't fit the patterns above redirect to the equivalent URI on my Ghost blog domain + handle /* { + redir https://sij.law{uri} permanent + } + + tls { + dns cloudflare {env.CLOUDFLARE_API_TOKEN} + } +} \ No newline at end of file diff --git a/Extras/Caddyfile/README.md b/Extras/Caddyfile/README.md new file mode 100644 index 0000000..256b8a8 --- /dev/null +++ b/Extras/Caddyfile/README.md @@ -0,0 +1 @@ +This is a sample Caddyfile for a load-balancing reverse-proxy setup with HTTPS, Cloudflare DNS challenge handling, API key handling (and specified endpoints exempt from key requirement), and a second domain with special handling for certain endpoints (e.g. /s for the URL shortener, /pgp for public PGP key) diff --git a/Extras/GPS.py Pythonista Script (location tracking)/GPS.py b/Extras/GPS.py Pythonista Script (location tracking)/GPS.py new file mode 100644 index 0000000..31a7e11 --- /dev/null +++ b/Extras/GPS.py Pythonista Script (location tracking)/GPS.py @@ -0,0 +1,63 @@ +import location +import time +import json +import os +import sys +from datetime import datetime, timezone + +def get_current_location(): + location.start_updates() + time.sleep(1) # Give it a moment to get an accurate fix + current_location = location.get_location() + location.stop_updates() + + elevation = current_location['altitude'] + latitude = current_location['latitude'] + longitude = current_location['longitude'] + current_time = datetime.now(timezone.utc) + timestamp = current_time.isoformat() + + return { + 'latitude': latitude, + 'longitude': longitude, + 'elevation': elevation, + 'datetime': timestamp + } + +def save_location_data(data, context, filename='location_log.json'): + if os.path.exists(filename): + with open(filename, 'r') as f: + existing_data = json.load(f) + else: + existing_data = [] + + data['context'] = context + existing_data.append(data) + + with open(filename, 'w') as f: + json.dump(existing_data, f, indent=4) + +if len(sys.argv) > 1: + try: + context = json.loads(sys.argv[1]) + context.setdefault('action', 'manual') + context.setdefault('device_type', 'Pythonista') + context.setdefault('device_model', None) + context.setdefault('device_name', None) + context.setdefault('device_os', None) + except json.JSONDecodeError: + print("Error: The provided argument is not a valid JSON.") + sys.exit(1) +else: + context = { + 'action': 'manual', + 'device_type': 'Pythonista', + 'device_model': None, + 'device_name': None, + 'device_os': None + } + +location_data = get_current_location() +save_location_data(location_data, context) +print(f"Location data: {location_data} with context '{context}' saved locally.") +time.sleep(5) diff --git a/Extras/GPS.py Pythonista Script (location tracking)/README.md b/Extras/GPS.py Pythonista Script (location tracking)/README.md new file mode 100644 index 0000000..aaeebae --- /dev/null +++ b/Extras/GPS.py Pythonista Script (location tracking)/README.md @@ -0,0 +1 @@ +These two companion scripts are designed for use with Pythonista on iOS devices. GPS.py records and saves your latitude and longitude to a local file, and uploadGPS.py uploads it to your sijapi instance (assuming you've entered a valid URL and matching API key for your GLOBAL_API_KEY entered when configuring sijapi. diff --git a/Extras/GPS.py Pythonista Script (location tracking)/uploadGPS.py b/Extras/GPS.py Pythonista Script (location tracking)/uploadGPS.py new file mode 100644 index 0000000..e52f495 --- /dev/null +++ b/Extras/GPS.py Pythonista Script (location tracking)/uploadGPS.py @@ -0,0 +1 @@ +how \ No newline at end of file diff --git a/sijapi/__main__.py b/sijapi/__main__.py index accac93..1eea4ec 100755 --- a/sijapi/__main__.py +++ b/sijapi/__main__.py @@ -117,28 +117,30 @@ async def http_exception_handler(request: Request, exc: HTTPException): err(f"Request: {request.method} {request.url}") return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) + @app.middleware("http") async def handle_exception_middleware(request: Request, call_next): try: response = await call_next(request) - except RuntimeError as exc: - if str(exc) == "Response content longer than Content-Length": - # Update the Content-Length header to match the actual response content length - response.headers["Content-Length"] = str(len(response.body)) - else: - raise - return response + return response + except Exception as exc: + err(f"Unhandled exception in request: {request.method} {request.url}") + err(f"Exception: {str(exc)}") + err(f"Traceback: {traceback.format_exc()}") + return JSONResponse( + status_code=500, + content={"detail": "Internal Server Error"} + ) @app.post("/sync/pull") async def pull_changes(): + info(f"Received request to /sync/pull") try: await API.add_primary_keys_to_local_tables() await API.add_primary_keys_to_remote_tables() try: - source = await API.get_most_recent_source() - if source: # Pull changes from the source total_changes = await API.pull_changes(source) @@ -156,9 +158,12 @@ async def pull_changes(): }) except Exception as e: - err(f"Error during pull: {str(e)}") + err(f"Error in /sync/pull: {str(e)}") err(f"Traceback: {traceback.format_exc()}") raise HTTPException(status_code=500, detail=f"Error during pull: {str(e)}") + + finally: + info(f"Finished processing /sync/pull request") except Exception as e: err(f"Error while ensuring primary keys to tables: {str(e)}")