From 1a987996f62533032d21087d5bb164619164d1b9 Mon Sep 17 00:00:00 2001
From: sanj <67624670+iodrift@users.noreply.github.com>
Date: Tue, 6 Aug 2024 19:55:48 -0700
Subject: [PATCH] Auto-update: Tue Aug  6 19:55:48 PDT 2024

---
 .../README.md                                 |   3 +
 .../archivist.js                              | 119 ++++++++++
 Extras/Caddyfile/Caddyfile.example            | 218 ++++++++++++++++++
 Extras/Caddyfile/README.md                    |   1 +
 .../GPS.py                                    |  63 +++++
 .../README.md                                 |   1 +
 .../uploadGPS.py                              |   1 +
 sijapi/__main__.py                            |  25 +-
 8 files changed, 421 insertions(+), 10 deletions(-)
 create mode 100644 Extras/Archivist.js UserScript (web archiving)/README.md
 create mode 100644 Extras/Archivist.js UserScript (web archiving)/archivist.js
 create mode 100644 Extras/Caddyfile/Caddyfile.example
 create mode 100644 Extras/Caddyfile/README.md
 create mode 100644 Extras/GPS.py Pythonista Script (location tracking)/GPS.py
 create mode 100644 Extras/GPS.py Pythonista Script (location tracking)/README.md
 create mode 100644 Extras/GPS.py Pythonista Script (location tracking)/uploadGPS.py

diff --git a/Extras/Archivist.js UserScript (web archiving)/README.md b/Extras/Archivist.js UserScript (web archiving)/README.md
new file mode 100644
index 0000000..4f64498
--- /dev/null
+++ b/Extras/Archivist.js UserScript (web archiving)/README.md	
@@ -0,0 +1,3 @@
+This is designed to work with UserScripts and similar browser extensions. Fill in the domain/URL where your sijapi instance is exposed (http://localhost:4444 is fine for the same device, but consider using a reverse proxy to extend to your mobile devices).
+
+And fill in your GLOBAL_API_KEY that you chose when configuring sijapi.
diff --git a/Extras/Archivist.js UserScript (web archiving)/archivist.js b/Extras/Archivist.js UserScript (web archiving)/archivist.js
new file mode 100644
index 0000000..0e91e5b
--- /dev/null
+++ b/Extras/Archivist.js UserScript (web archiving)/archivist.js	
@@ -0,0 +1,119 @@
+// ==UserScript==
+// @name         Archivist
+// @version      0.5
+// @description  archivist userscript posts to sij.ai/archive
+// @author       sij
+// @match        *://*/*
+// @grant        GM_xmlhttpRequest
+// ==/UserScript==
+
+(function () {
+  "use strict";
+
+  // Function to check if the URL is likely an ad, tracker, or unwanted resource
+  function isUnwantedURL(url) {
+    const unwantedPatterns = [
+      /doubleclick\.net/,
+      /googlesyndication\.com/,
+      /adservice\./,
+      /analytics\./,
+      /tracker\./,
+      /pixel\./,
+      /ad\d*\./,
+      /\.ad\./,
+      /ads\./,
+      /\/ads\//,
+      /url=http/,
+      /safeframe/,
+      /adsystem/,
+      /adserver/,
+      /adnetwork/,
+      /sync\./,
+      /beacon\./,
+      /optimizely/,
+      /outbrain/,
+      /widgets\./,
+      /cdn\./,
+      /pixel\?/,
+      /recaptcha/,
+      /accounts\.google\.com\/gsi/,
+      /imasdk\.googleapis\.com/,
+      /amplify-imp/,
+      /zemanta/,
+      /monitor\.html/,
+      /widgetMonitor/,
+      /nanoWidget/,
+      /client_storage/,
+    ];
+    return unwantedPatterns.some((pattern) => pattern.test(url));
+  }
+
+  // Function to archive the page
+  function archivePage() {
+    var currentUrl = window.location.href;
+
+    if (isUnwantedURL(currentUrl)) {
+      console.log("Skipping unwanted URL:", currentUrl);
+      return;
+    }
+
+    var data = new URLSearchParams({
+      title: document.title,
+      url: currentUrl,
+      referrer: document.referrer || "",
+      width: window.innerWidth ? window.innerWidth.toString() : "",
+      encoding: document.characterSet,
+      source: document.documentElement.outerHTML,
+    });
+
+    GM_xmlhttpRequest({
+      method: "POST",
+      url: "https://api.sij.ai/archive?api_key=sk-NhrtQwCHNdK5sRZC",
+      headers: {
+        "Content-Type": "application/x-www-form-urlencoded",
+        Authorization: "bearer sk-NhrtQwCHNdK5sRZC",
+      },
+      data: data.toString(),
+      onload: function (response) {
+        console.log("Archive request sent for:", currentUrl);
+      },
+      onerror: function (error) {
+        console.error("Error sending archive request:", error);
+      },
+    });
+  }
+
+  // Debounce function to limit how often archivePage can be called
+  function debounce(func, wait) {
+    let timeout;
+    return function executedFunction(...args) {
+      const later = () => {
+        clearTimeout(timeout);
+        func(...args);
+      };
+      clearTimeout(timeout);
+      timeout = setTimeout(later, wait);
+    };
+  }
+
+  // Debounced version of archivePage
+  const debouncedArchivePage = debounce(archivePage, 2000);
+
+  // Listen for navigation events
+  window.addEventListener("popstate", debouncedArchivePage);
+
+  // Intercept link clicks
+  document.addEventListener(
+    "click",
+    function (e) {
+      var link = e.target.closest("a");
+      if (link && !isUnwantedURL(link.href)) {
+        setTimeout(debouncedArchivePage, 1000); // Delay to allow page to load
+      }
+    },
+    true
+  );
+
+  // Initial page load
+  setTimeout(archivePage, 5000);
+})();
diff --git a/Extras/Caddyfile/Caddyfile.example b/Extras/Caddyfile/Caddyfile.example
new file mode 100644
index 0000000..4feb2a2
--- /dev/null
+++ b/Extras/Caddyfile/Caddyfile.example
@@ -0,0 +1,218 @@
+{
+	log {
+		# Specify path and log level for Caddy logs
+		output file /var/log/caddy/logfile.log
+		level INFO
+	}
+
+	# replace `localhost` with an externally accessible IP address, e.g. a local LAN address or Tailscale IP. Take care not to use a publicly accessible IP address, as the Caddy API is not separately protected by API keys!
+	admin localhost:2019
+
+	servers {
+		metrics
+	}
+
+	# Replace with your email address for SSL certificate registration
+	email info@example.com
+}
+
+# This is a highly permissive CORS config. Dial it back as your use case allows.
+(cors) {
+	@cors_preflight method OPTIONS
+	header {
+		Access-Control-Allow-Origin "*"
+		Access-Control-Expose-Headers "Authorization"
+		Access-Control-Allow-Credentials "true"
+		Access-Control-Allow-Headers "Authorization, Content-Type"
+	}
+
+	handle @cors_preflight {
+		header {
+			Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE"
+			Access-Control-Max-Age "3600"
+		}
+		respond "" 204
+	}
+}
+
+# Replace with the subdomain you want to expose your API over
+api.example.com {
+	import cors
+
+	# Specify which endpoints do not require an API key
+	@public {
+		path /img/* /oauth /oauth/* /MS365 /MS365/* /ip /health /health* /health/* /id /identity
+	}
+
+	# Accept your GLOBAL_API_KEY (specified via environment variable in Caddy's context) via `Authorization: Bearer` header
+	@apiKeyAuthHeader {
+		header Authorization "Bearer {env.GLOBAL_API_KEY}"
+	}
+
+	# Optionally, accept your GLOBAL_API_KEY via query parameters
+	@apiKeyAuthQuery {
+		query api_key={env.GLOBAL_API_KEY}
+	}
+
+	handle @public {
+		reverse_proxy {
+			# Specify the local (or Tailscale) IPs & ports where the API service is running
+			to  100.64.64.20:4444 100.64.64.11:4444 10.13.37.30:4444 localhost:4444
+			lb_policy first
+			health_uri /health
+			health_interval 10s
+			health_timeout 5s
+			health_status 2xx
+			header_up X-Forwarded-For {remote}
+			header_up X-Forwarded-Proto {scheme}
+		}
+	}
+
+	handle @apiKeyAuthHeader {
+		reverse_proxy {
+			# Specify the local (or Tailscale) IPs & ports where the API service is running
+			to  100.64.64.20:4444 100.64.64.11:4444 10.13.37.30:4444 localhost:4444
+			lb_policy first
+			health_uri /health
+			health_interval 10s
+			health_timeout 5s
+			health_status 2xx
+		}
+	}
+
+	handle @apiKeyAuthQuery {
+		reverse_proxy {
+			# Specify the local (or Tailscale) IPs & ports where the API service is running
+			to  100.64.64.20:4444 100.64.64.11:4444 10.13.37.30:4444 localhost:4444
+			lb_policy first
+			health_uri /health
+			health_interval 10s
+			health_timeout 5s
+			health_status 2xx
+		}
+	}
+
+	handle {
+		respond "Unauthorized: Valid API key required" 401
+	}
+
+	# Assuming you use Cloudflare for DNS challenges and have configured a CLOUDFLARE_API_TOKEN environmental variable in Caddy's context
+	tls {
+		dns cloudflare {env.CLOUDFLARE_API_TOKEN}
+	}
+
+	log {
+		output file /var/log/caddy/sijapi.log {
+			roll_size 100mb
+			roll_keep 5
+			roll_keep_for 720h
+		}
+		format json {
+			time_format "iso8601"
+			message_key "message"
+		}
+	}
+}
+
+# Everything below here is ancillary to the primary API functionality
+# If you have another domain you want to expose a particular endpoint on, try something like this -- e.g., here, https://sij.law/pgp as a short URL to share my public PGP key via.
+sij.law {
+	reverse_proxy /pgp 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 {
+		lb_policy first
+		health_uri /health
+		health_interval 10s
+		health_timeout 5s
+		health_status 2xx
+	}
+
+	# Because I maintain a seperate service on this domain (a Ghost blog), I need fall back handling for everything besides `/pgp`.
+	reverse_proxy localhost:2368
+	tls {
+		dns cloudflare {env.CLOUDFLARE_API_TOKEN}
+	}
+}
+
+# Another special use case example: this provides handling for my URL shortener. 
+sij.ai {
+	
+	# Any three-character alphanumeric URI is construed as a shortened URL.
+	@shorturl {
+		path_regexp ^/[a-zA-Z0-9]{3}$
+	}
+
+	# https://sij.ai/s points to the WebUI for my URL shortener
+	@shortener_ui {
+		path /s
+	}
+
+	@apiKeyAuthHeader {
+        header Authorization "Bearer {env.GLOBAL_API_KEY}"
+    }
+
+    @apiKeyAuthQuery {
+        query api_key={env.GLOBAL_API_KEY}
+    }
+
+	@analytics {
+		path_regexp ^/analytics/[a-zA-Z0-9]{3}$
+	}
+
+	@pgp {
+		path /pgp
+	}
+
+	handle @shortener_ui {
+		reverse_proxy 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 {
+			lb_policy first
+			health_uri /health
+			health_interval 10s
+			health_timeout 5s
+			health_status 2xx
+		}
+	}
+
+	handle @shorturl {
+		reverse_proxy 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 {
+			lb_policy first
+			health_uri /health
+			health_interval 10s
+			health_timeout 5s
+			health_status 2xx
+		}
+	}
+
+	handle @analytics {
+		reverse_proxy 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 {
+			lb_policy first
+			health_uri /health
+			health_interval 10s
+			health_timeout 5s
+			health_status 2xx
+		}
+	}
+
+	# Handling for my public PGP key endpoint
+	handle @pgp {
+		reverse_proxy 100.64.64.20:4444 100.64.64.30:4444 100.64.64.11:4444 localhost:4444 {
+			lb_policy first
+			health_uri /health
+			health_interval 10s
+			health_timeout 5s
+			health_status 2xx
+		}
+	}
+	
+	# Base domain redirects to my Ghost blog
+	handle / {
+		redir https://sij.law permanent
+	}
+
+	# All URIs that don't fit the patterns above redirect to the equivalent URI on my Ghost blog domain
+	handle /* {
+		redir https://sij.law{uri} permanent
+	}
+
+	tls {
+		dns cloudflare {env.CLOUDFLARE_API_TOKEN}
+	}
+}
\ No newline at end of file
diff --git a/Extras/Caddyfile/README.md b/Extras/Caddyfile/README.md
new file mode 100644
index 0000000..256b8a8
--- /dev/null
+++ b/Extras/Caddyfile/README.md
@@ -0,0 +1 @@
+This is a sample Caddyfile for a load-balancing reverse-proxy setup with HTTPS, Cloudflare DNS challenge handling, API key handling (and specified endpoints exempt from key requirement), and a second domain with special handling for certain endpoints (e.g. /s for the URL shortener, /pgp for public PGP key)
diff --git a/Extras/GPS.py Pythonista Script (location tracking)/GPS.py b/Extras/GPS.py Pythonista Script (location tracking)/GPS.py
new file mode 100644
index 0000000..31a7e11
--- /dev/null
+++ b/Extras/GPS.py Pythonista Script (location tracking)/GPS.py	
@@ -0,0 +1,63 @@
+import location
+import time
+import json
+import os
+import sys
+from datetime import datetime, timezone
+
+def get_current_location():
+    location.start_updates()
+    time.sleep(1)  # Give it a moment to get an accurate fix
+    current_location = location.get_location()
+    location.stop_updates()
+
+    elevation = current_location['altitude']
+    latitude = current_location['latitude']
+    longitude = current_location['longitude']
+    current_time = datetime.now(timezone.utc)
+    timestamp = current_time.isoformat()
+
+    return {
+        'latitude': latitude,
+        'longitude': longitude,
+        'elevation': elevation,
+        'datetime': timestamp
+    }
+
+def save_location_data(data, context, filename='location_log.json'):
+    if os.path.exists(filename):
+        with open(filename, 'r') as f:
+            existing_data = json.load(f)
+    else:
+        existing_data = []
+
+    data['context'] = context
+    existing_data.append(data)
+
+    with open(filename, 'w') as f:
+        json.dump(existing_data, f, indent=4)
+
+if len(sys.argv) > 1:
+    try:
+        context = json.loads(sys.argv[1])
+        context.setdefault('action', 'manual')
+        context.setdefault('device_type', 'Pythonista')
+        context.setdefault('device_model', None)
+        context.setdefault('device_name', None)
+        context.setdefault('device_os', None)
+    except json.JSONDecodeError:
+        print("Error: The provided argument is not a valid JSON.")
+        sys.exit(1)
+else:
+    context = {
+        'action': 'manual',
+        'device_type': 'Pythonista',
+        'device_model': None,
+        'device_name': None,
+        'device_os': None
+    }
+
+location_data = get_current_location()
+save_location_data(location_data, context)
+print(f"Location data: {location_data} with context '{context}' saved locally.")
+time.sleep(5)
diff --git a/Extras/GPS.py Pythonista Script (location tracking)/README.md b/Extras/GPS.py Pythonista Script (location tracking)/README.md
new file mode 100644
index 0000000..aaeebae
--- /dev/null
+++ b/Extras/GPS.py Pythonista Script (location tracking)/README.md	
@@ -0,0 +1 @@
+These two companion scripts are designed for use with Pythonista on iOS devices. GPS.py records and saves your latitude and longitude to a local file, and uploadGPS.py uploads it to your sijapi instance (assuming you've entered a valid URL and matching API key for your GLOBAL_API_KEY entered when configuring sijapi.
diff --git a/Extras/GPS.py Pythonista Script (location tracking)/uploadGPS.py b/Extras/GPS.py Pythonista Script (location tracking)/uploadGPS.py
new file mode 100644
index 0000000..e52f495
--- /dev/null
+++ b/Extras/GPS.py Pythonista Script (location tracking)/uploadGPS.py	
@@ -0,0 +1 @@
+how
\ No newline at end of file
diff --git a/sijapi/__main__.py b/sijapi/__main__.py
index accac93..1eea4ec 100755
--- a/sijapi/__main__.py
+++ b/sijapi/__main__.py
@@ -117,28 +117,30 @@ async def http_exception_handler(request: Request, exc: HTTPException):
     err(f"Request: {request.method} {request.url}")
     return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
 
+
 @app.middleware("http")
 async def handle_exception_middleware(request: Request, call_next):
     try:
         response = await call_next(request)
-    except RuntimeError as exc:
-        if str(exc) == "Response content longer than Content-Length":
-            # Update the Content-Length header to match the actual response content length
-            response.headers["Content-Length"] = str(len(response.body))
-        else:
-            raise
-    return response
+        return response
+    except Exception as exc:
+        err(f"Unhandled exception in request: {request.method} {request.url}")
+        err(f"Exception: {str(exc)}")
+        err(f"Traceback: {traceback.format_exc()}")
+        return JSONResponse(
+            status_code=500,
+            content={"detail": "Internal Server Error"}
+        )
 
 
 @app.post("/sync/pull")
 async def pull_changes():
+    info(f"Received request to /sync/pull")
     try:
         await API.add_primary_keys_to_local_tables()
         await API.add_primary_keys_to_remote_tables()
         try:
-
             source = await API.get_most_recent_source()
-
             if source:
                 # Pull changes from the source
                 total_changes = await API.pull_changes(source)
@@ -156,9 +158,12 @@ async def pull_changes():
                 })
 
         except Exception as e:
-            err(f"Error during pull: {str(e)}")
+            err(f"Error in /sync/pull: {str(e)}")
             err(f"Traceback: {traceback.format_exc()}")
             raise HTTPException(status_code=500, detail=f"Error during pull: {str(e)}")
+            
+        finally:
+            info(f"Finished processing /sync/pull request")
 
     except Exception as e:
             err(f"Error while ensuring primary keys to tables: {str(e)}")