Auto-update: Thu Jun 27 09:46:17 PDT 2024

2024-06-27 09:46:17 -07:00 · 2024-06-27 09:46:17 -07:00 · 8f095e5e71
commit 8f095e5e71
parent d1f5c923ca
4 changed files with 175 additions and 161 deletions
--- a/sijapi/init.py
+++ b/sijapi/init.py
@ -72,7 +72,7 @@ DynamicTZ = TimezoneTracker(DB)
 ### Obsidian & notes
 ALLOWED_FILENAME_CHARS = r'[^\w \.-]'
-MAX_FILENAME_LENGTH = 255
+MAX_PATH_LENGTH = 254
 OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or HOME_DIR / "Nextcloud" / "notes")
 OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
 OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
@ -80,6 +80,8 @@ OBSIDIAN_BANNER_DIR = f"{OBSIDIAN_RESOURCES_DIR}/banners"
 os.makedirs(Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_BANNER_DIR, exist_ok=True)
 OBSIDIAN_BANNER_SCENE = os.getenv("OBSIDIAN_BANNER_SCENE", "wallpaper")
 OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian")
 ARCHIVE_DIR = Path(os.getenv("ARCHIVE_DIR", OBSIDIAN_VAULT_DIR / "archive"))
 os.makedirs(ARCHIVE_DIR, exist_ok=True)
 DOC_DIR = DATA_DIR / "docs"
 os.makedirs(DOC_DIR, exist_ok=True)
--- a/sijapi/data/sd/workflows/wallpaper.json
+++ b/sijapi/data/sd/workflows/wallpaper.json
@ -51,23 +51,23 @@
    "inputs": {
      "batch_size": 1,
      "width": 1023,
-      "height": 1025,
+      "height": 1024,
      "resampling": "bicubic",
      "X": 0,
      "Y": 0,
      "Z": 0,
      "evolution": 0.1,
      "frame": 1,
-      "scale": 13.1,
+      "scale": 6.66,
      "octaves": 8,
-      "persistence": 6.2,
+      "persistence": 3,
-      "lacunarity": 5.38,
+      "lacunarity": 6.66,
-      "exponent": 4.5600000000000005,
+      "exponent": 1,
-      "brightness": -0.16,
+      "brightness": 0,
-      "contrast": -0.13,
+      "contrast": 0,
      "clamp_min": 0,
      "clamp_max": 1,
-      "seed": 474669046020372,
+      "seed": 300432080108380,
      "device": "cpu",
      "optional_vae": [
        "4",
@ -81,10 +81,10 @@
  },
  "13": {
    "inputs": {
-      "seed": 484066073734968,
+      "seed": 1125631171146107,
-      "steps": 8,
+      "steps": 10,
      "cfg": 1.8,
-      "sampler_name": "dpmpp_2m_sde",
+      "sampler_name": "dpmpp_2s_ancestral",
      "scheduler": "karras",
      "start_at_step": 0,
      "end_at_step": 10000,
@ -197,57 +197,6 @@
      "title": "CLIP Text Encode (Prompt)"
    }
  },
  "22": {
    "inputs": {
      "upscale_by": 2,
      "seed": 589846903558615,
      "steps": 20,
      "cfg": 1.6,
      "sampler_name": "heun",
      "scheduler": "sgm_uniform",
      "denoise": 0.21,
      "mode_type": "Linear",
      "tile_width": 512,
      "tile_height": 512,
      "mask_blur": 8,
      "tile_padding": 32,
      "seam_fix_mode": "Band Pass",
      "seam_fix_denoise": 1,
      "seam_fix_width": 64,
      "seam_fix_mask_blur": 8,
      "seam_fix_padding": 16,
      "force_uniform_tiles": true,
      "tiled_decode": true,
      "image": [
        "38",
        0
      ],
      "model": [
        "4",
        0
      ],
      "positive": [
        "6",
        0
      ],
      "negative": [
        "23",
        0
      ],
      "vae": [
        "4",
        2
      ],
      "upscale_model": [
        "24",
        0
      ]
    },
    "class_type": "UltimateSDUpscale",
    "_meta": {
      "title": "Ultimate SD Upscale"
    }
  },
  "23": {
    "inputs": {
      "conditioning": [
@ -276,7 +225,7 @@
        0
      ],
      "image": [
-        "22",
+        "39",
        0
      ]
    },
@ -313,21 +262,6 @@
      "title": "ImageBlur"
    }
  },
  "36": {
    "inputs": {
      "mode": "bicubic",
      "factor": 1.25,
      "align": "true",
      "samples": [
        "13",
        0
      ]
    },
    "class_type": "Latent Upscale by Factor (WAS)",
    "_meta": {
      "title": "Latent Upscale by Factor (WAS)"
    }
  },
  "38": {
    "inputs": {
      "samples": [
@ -343,5 +277,65 @@
    "_meta": {
      "title": "VAE Decode"
    }
  },
  "39": {
    "inputs": {
      "upscale_by": 2,
      "seed": 687912408861107,
      "steps": 20,
      "cfg": 1.9000000000000001,
      "sampler_name": "heun",
      "scheduler": "sgm_uniform",
      "denoise": 0.2,
      "mode_type": "Linear",
      "tile_width": 512,
      "tile_height": 512,
      "mask_blur": 8,
      "tile_padding": 32,
      "seam_fix_mode": "Band Pass",
      "seam_fix_denoise": 1,
      "seam_fix_width": 64,
      "seam_fix_mask_blur": 8,
      "seam_fix_padding": 16,
      "force_uniform_tiles": true,
      "tiled_decode": true,
      "image": [
        "38",
        0
      ],
      "model": [
        "4",
        0
      ],
      "positive": [
        "6",
        0
      ],
      "negative": [
        "23",
        0
      ],
      "vae": [
        "4",
        2
      ],
      "upscale_model": [
        "40",
        0
      ]
    },
    "class_type": "UltimateSDUpscale",
    "_meta": {
      "title": "Ultimate SD Upscale"
    }
  },
  "40": {
    "inputs": {
      "model_name": "RealESRGAN_x2.pth"
    },
    "class_type": "UpscaleModelLoader",
    "_meta": {
      "title": "Load Upscale Model"
    }
  }
 }
--- a/sijapi/routers/note.py
+++ b/sijapi/routers/note.py
@ -7,15 +7,19 @@ from io import BytesIO
 from pydantic import BaseModel
 import os, re
 import uuid
 import aiohttp
 import traceback
 import requests
 import mimetypes
 import shutil
 from bs4 import BeautifulSoup
 from markdownify import markdownify as md
 from typing import Optional, Union, Dict, List, Tuple
 from urllib.parse import urlparse
 from urllib3.util.retry import Retry
 from newspaper import Article
 import trafilatura
 from readability import Document
 from requests.adapters import HTTPAdapter
 import re
 import os
@ -23,10 +27,10 @@ from datetime import timedelta, datetime, time as dt_time, date as dt_date
 from fastapi import HTTPException, status
 from pathlib import Path
 from fastapi import APIRouter, Query, HTTPException
-from sijapi import L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, BASE_URL, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, TZ
+from sijapi import L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, ARCHIVE_DIR, BASE_URL, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, TZ
 from sijapi.routers import tts, llm, time, sd, locate, weather, asr, calendar
 from sijapi.routers.locate import Location
-from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, HOURLY_COLUMNS_MAPPING
+from sijapi.utilities import assemble_journal_path, assemble_archive_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, HOURLY_COLUMNS_MAPPING
 note = APIRouter()
@ -440,9 +444,9 @@ async def parse_article(url: str, source: Optional[str] = None):
    L.INFO(f"Parsed {np3k.title}")
-    title = np3k.title or traf.title
+    title = (np3k.title or traf.title) or url
    authors = np3k.authors or traf.author
-    authors = authors if isinstance(authors, List) else [authors]
+    authors = (authors if isinstance(authors, List) else [authors])
    date = np3k.publish_date or traf.date
    try:
        date = await locate.localize_datetime(date)
@ -469,6 +473,33 @@ async def parse_article(url: str, source: Optional[str] = None):
    }
 async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]:
    if source:
        html_content = source
    elif url:
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                html_content = await response.text()
    else:
        L.ERR(f"Unable to convert nothing to markdown.")
        return None
    # Use readability to extract the main content
    doc = Document(html_content)
    cleaned_html = doc.summary()
    # Parse the cleaned HTML with BeautifulSoup for any additional processing
    soup = BeautifulSoup(cleaned_html, 'html.parser')
    # Remove any remaining unwanted elements
    for element in soup(['script', 'style']):
        element.decompose()
    # Convert to markdown
    markdown_content = md(str(soup), heading_style="ATX")
    return markdown_content
 async def process_archive(
    background_tasks: BackgroundTasks,
@ -476,59 +507,32 @@ async def process_archive(
    title: Optional[str] = None,
    encoding: str = 'utf-8',
    source: Optional[str] = None,
-):
+) -> Path:
    timestamp = datetime.now().strftime('%b %d, %Y at %H:%M')
    readable_title = title if title else f"{url} - {timestamp}"
-    parsed_content = await parse_article(url, source)
+    content = await html_to_markdown(url, source)
-    if parsed_content is None:
+    if content is None:
-        return {"error": "Failed to retrieve content"}
+        raise HTTPException(status_code=400, detail="Failed to convert content to markdown")
    content = parsed_content["content"]
-    readable_title = sanitize_filename(title if title else parsed_content.get("title", "Untitled"))
+    markdown_path, relative_path = assemble_archive_path(readable_title, ".md")
    if not readable_title:
        readable_title = timestamp
-    markdown_path = OBSIDIAN_VAULT_DIR / "archive"
+    markdown_content = f"---\n"
    markdown_content += f"title: {readable_title}\n"
    markdown_content += f"added: {timestamp}\n"
    markdown_content += f"---\n\n"
    markdown_content += f"# {readable_title}\n\n"
    markdown_content += content
    try:
-        frontmatter = f"""---
+        markdown_path.parent.mkdir(parents=True, exist_ok=True)
-title: {readable_title}
+        with open(markdown_path, 'w', encoding=encoding) as md_file:
-author: {parsed_content.get('author', 'Unknown')}
+            md_file.write(markdown_content)
-published: {parsed_content.get('date_published', 'Unknown')}
+        L.INFO(f"Successfully saved to {markdown_path}")
-added: {timestamp}
+        return markdown_path
 excerpt: {parsed_content.get('excerpt', '')}
 ---
 """
        body = f"# {readable_title}\n\n"
        try:
            authors = parsed_content.get('author', '')
            authors_in_brackets = [f"[[{author.strip()}]]" for author in authors.split(",")]
            authors_string = ", ".join(authors_in_brackets)
            body += f"by {authors_string} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({parsed_content.get('url', url)}).\n\n"
            body += content
            markdown_content = frontmatter + body
        except Exception as e:
            L.ERR(f"Failed to combine elements of article markdown.")
        try:
            with open(markdown_path, 'w', encoding=encoding) as md_file:
                md_file.write(markdown_content)
            L.INFO(f"Successfully saved to {markdown_path}")
            add_to_daily_note
            return markdown_path
        except Exception as e:
            L.ERR(f"Failed to write markdown file")
            raise HTTPException(status_code=500, detail=str(e))
    except Exception as e:
-        L.ERR(f"Failed to clip {url}: {str(e)}")
+        L.ERR(f"Failed to write markdown file: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail=f"Failed to write markdown file: {str(e)}")
 def download_file(url, folder):
    os.makedirs(folder, exist_ok=True)
@ -569,7 +573,6 @@ def copy_file(local_path, folder):
    return filename
 async def save_file(file: UploadFile, folder: Path) -> Path:
    file_path = folder / f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{file.filename}"
    with open(file_path, 'wb') as f:
--- a/sijapi/utilities.py
+++ b/sijapi/utilities.py
@ -25,7 +25,7 @@ import asyncpg
 from sshtunnel import SSHTunnelForwarder
 from fastapi import Depends, HTTPException, Request, UploadFile
 from fastapi.security.api_key import APIKeyHeader
-from sijapi import L, GLOBAL_API_KEY, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_FILENAME_LENGTH
+from sijapi import L, GLOBAL_API_KEY, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
 api_key_header = APIKeyHeader(name="Authorization")
@ -38,6 +38,35 @@ def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
            raise HTTPException(status_code=401, detail="Invalid or missing API key")
 def assemble_archive_path(filename: str, extension: str = ".md", date_time: datetime = datetime.now(), subdir: str = None) -> Tuple[Path, Path]:
    year = date_time.strftime(YEAR_FMT)
    month = date_time.strftime(MONTH_FMT)
    day = date_time.strftime(DAY_FMT)
    day_short = date_time.strftime(DAY_SHORT_FMT)
    timestamp = date_time.strftime("%H%M%S")
    # Ensure the extension is preserved
    base_name, ext = os.path.splitext(filename)
    extension = ext if ext else extension
    # Initial sanitization
    sanitized_base = sanitize_filename(base_name, '')
    filename = f"{day_short} {timestamp} {sanitized_base}{extension}"
    relative_path = Path(year) / month / day / filename
    absolute_path = ARCHIVE_DIR / relative_path
    # Ensure the total path length doesn't exceed MAX_PATH_LENGTH
    while len(str(absolute_path)) > MAX_PATH_LENGTH:
        # Truncate the sanitized_base, not the full filename
        sanitized_base = sanitized_base[:-1]
        filename = f"{day_short} {timestamp} {sanitized_base}{extension}"
        relative_path = Path(year) / month / day / filename
        absolute_path = ARCHIVE_DIR / relative_path
    return absolute_path, relative_path
 def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str = None, extension: str = None, no_timestamp: bool = False) -> Tuple[Path, Path]:
    '''
    Obsidian helper. Takes a datetime and optional subdirectory name, filename, and extension.
@ -51,32 +80,22 @@ def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str
    timestamp = date_time.strftime("%H%M%S")
    relative_path = Path("journal") / year / month / day
    if not subdir and not filename and not extension:
        # standard daily note handler, where only the date_time was specified:
        relative_path = relative_path / f"{day}.md"
    else:
        if subdir:
            # datestamped subdirectory handler
            relative_path = relative_path / f"{day_short} {subdir}"
        if filename:
            filename = sanitize_filename(filename)
            filename = f"{day_short} {filename}" if no_timestamp else f"{day_short} {timestamp} {filename}"
            if extension:
                extension = extension if extension.startswith(".") else f".{extension}"
                filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
            else:
-                if has_valid_extension(filename, [".md", ".m4a", ".wav", ".aiff", ".flac", ".mp3", ".mp4", ".pdf", ".js", ".json", ".yaml", ".py"]):
+                extension = validate_extension(filename, [".md", ".m4a", ".wav", ".aiff", ".flac", ".mp3", ".mp4", ".pdf", ".js", ".json", ".yaml", ".py"]) or ".md"
                    L.DEBUG(f"Provided filename has a valid extension, so we use that.")
                else:
                    filename = f"{filename}.md"
                    L.DEBUG(f"We are forcing the file to be a .md")
            filename = sanitize_filename(filename)
            filename = f"{day_short} {filename}" if no_timestamp else f"{day_short} {timestamp} {filename}"
            filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
            relative_path = relative_path / filename
        else:
@ -84,20 +103,16 @@ def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str
            return None, None
    absolute_path = OBSIDIAN_VAULT_DIR / relative_path 
    os.makedirs(absolute_path.parent, exist_ok=True)
    return absolute_path, relative_path
-def has_valid_extension(filename, valid_extensions=None):
+def validate_extension(filename, valid_extensions=None):
    if valid_extensions is None:
-        # Check if there's any extension
+        return os.path.splitext(filename)
        return bool(os.path.splitext(filename)[1])
    else:
-        # Check if the extension is in the list of valid extensions
+        extension = os.path.splitext(filename)[-1].lower()
-        return os.path.splitext(filename)[1].lower() in valid_extensions
+        return extension if extension in valid_extensions else None
 def prefix_lines(text: str, prefix: str = '> ') -> str:
    lines = text.split('\n')
@ -138,7 +153,7 @@ def get_extension(file):
-def sanitize_filename(text, max_length=MAX_FILENAME_LENGTH):
+def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
    """Sanitize a string to be used as a safe filename while protecting the file extension."""
    L.DEBUG(f"Filename before sanitization: {text}")
@ -149,7 +164,7 @@ def sanitize_filename(text, max_length=MAX_FILENAME_LENGTH):
    max_base_length = max_length - len(extension)
    if len(base_name) > max_base_length:
-        base_name = base_name[:max_base_length].rstrip()
+        base_name = base_name[:max_base_length - 5].rstrip()
    final_filename = base_name + extension
    L.DEBUG(f"Filename after sanitization: {final_filename}")