Auto-update: Sun Jun 30 11:08:14 PDT 2024
This commit is contained in:
parent
c9dc619a5a
commit
c742336b62
5 changed files with 369 additions and 114 deletions
sijapi
|
@ -23,21 +23,16 @@ os.makedirs(LOGS_DIR, exist_ok=True)
|
|||
load_dotenv(ENV_PATH)
|
||||
|
||||
### API essentials
|
||||
API_CONFIG_PATH = CONFIG_DIR / "api.yaml"
|
||||
SECRETS_PATH = CONFIG_DIR / "secrets.yaml"
|
||||
API = APIConfig.load(API_CONFIG_PATH, SECRETS_PATH)
|
||||
DIR_CONFIG_PATH = CONFIG_DIR / "dirs.yaml"
|
||||
L.DEBUG(f"Loading DIR configuration from: {DIR_CONFIG_PATH}")
|
||||
DIR = Configuration.load(DIR_CONFIG_PATH)
|
||||
L.DEBUG(f"Loaded DIR configuration: {DIR.__dict__}")
|
||||
|
||||
DB = Database.from_env()
|
||||
|
||||
API = APIConfig.load('api', 'secrets')
|
||||
Dir = Configuration.load('dirs')
|
||||
HOST = f"{API.BIND}:{API.PORT}"
|
||||
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
|
||||
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
|
||||
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
|
||||
DB = Database.from_env()
|
||||
|
||||
News = Configuration.load('news', 'secrets')
|
||||
SD = Configuration.load('sd', 'secrets')
|
||||
|
||||
### Directories & general paths
|
||||
ROUTER_DIR = BASE_DIR / "routers"
|
||||
|
@ -66,7 +61,7 @@ GEO = Geocoder(NAMED_LOCATIONS, TZ_CACHE)
|
|||
### Obsidian & notes
|
||||
ALLOWED_FILENAME_CHARS = r'[^\w \.-]'
|
||||
MAX_PATH_LENGTH = 254
|
||||
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or Path(DIR.HOME) / "Nextcloud" / "notes")
|
||||
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or Path(Dir.HOME) / "Nextcloud" / "notes")
|
||||
OBSIDIAN_JOURNAL_DIR = OBSIDIAN_VAULT_DIR / "journal"
|
||||
OBSIDIAN_RESOURCES_DIR = "obsidian/resources"
|
||||
OBSIDIAN_BANNER_DIR = f"{OBSIDIAN_RESOURCES_DIR}/banners"
|
||||
|
@ -118,7 +113,7 @@ SD_CONFIG_PATH = CONFIG_DIR / 'sd.yaml'
|
|||
### ASR
|
||||
ASR_DIR = DATA_DIR / "asr"
|
||||
os.makedirs(ASR_DIR, exist_ok=True)
|
||||
WHISPER_CPP_DIR = Path(DIR.HOME) / str(os.getenv("WHISPER_CPP_DIR"))
|
||||
WHISPER_CPP_DIR = Path(Dir.HOME) / str(os.getenv("WHISPER_CPP_DIR"))
|
||||
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'NULL,VOID').split(',')
|
||||
|
||||
### TTS
|
||||
|
@ -135,6 +130,7 @@ TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
|
|||
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
|
||||
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
||||
|
||||
|
||||
### Calendar & email account
|
||||
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
|
||||
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
|
||||
|
|
|
@ -22,6 +22,119 @@ from timezonefinder import TimezoneFinder
|
|||
T = TypeVar('T', bound='Configuration')
|
||||
|
||||
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Union, Optional, Any, Dict, List
|
||||
import yaml
|
||||
import re
|
||||
from pydantic import BaseModel, create_model
|
||||
from dotenv import load_dotenv
|
||||
|
||||
class Configuration(BaseModel):
|
||||
HOME: Path = Path.home()
|
||||
_dir_config: Optional['Configuration'] = None
|
||||
|
||||
@classmethod
|
||||
def load(cls, yaml_path: Union[str, Path], secrets_path: Optional[Union[str, Path]] = None, dir_config: Optional['Configuration'] = None) -> 'Configuration':
|
||||
yaml_path = cls._resolve_path(yaml_path, 'config')
|
||||
if secrets_path:
|
||||
secrets_path = cls._resolve_path(secrets_path, 'config')
|
||||
|
||||
try:
|
||||
with yaml_path.open('r') as file:
|
||||
config_data = yaml.safe_load(file)
|
||||
|
||||
print(f"Loaded configuration data from {yaml_path}")
|
||||
|
||||
if secrets_path:
|
||||
with secrets_path.open('r') as file:
|
||||
secrets_data = yaml.safe_load(file)
|
||||
print(f"Loaded secrets data from {secrets_path}")
|
||||
config_data.update(secrets_data)
|
||||
|
||||
# Ensure HOME is set
|
||||
if config_data.get('HOME') is None:
|
||||
config_data['HOME'] = str(Path.home())
|
||||
print(f"HOME was None in config, set to default: {config_data['HOME']}")
|
||||
|
||||
load_dotenv()
|
||||
|
||||
instance = cls.create_dynamic_model(**config_data)
|
||||
instance._dir_config = dir_config or instance
|
||||
|
||||
resolved_data = instance.resolve_placeholders(config_data)
|
||||
instance = cls.create_dynamic_model(**resolved_data)
|
||||
instance._dir_config = dir_config or instance
|
||||
|
||||
return instance
|
||||
except Exception as e:
|
||||
print(f"Error loading configuration: {str(e)}")
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
|
||||
base_path = Path(__file__).parent.parent # This will be two levels up from this file
|
||||
path = Path(path)
|
||||
if not path.suffix:
|
||||
path = base_path / 'sijapi' / default_dir / f"{path.name}.yaml"
|
||||
elif not path.is_absolute():
|
||||
path = base_path / path
|
||||
return path
|
||||
|
||||
def resolve_placeholders(self, data: Any) -> Any:
|
||||
if isinstance(data, dict):
|
||||
return {k: self.resolve_placeholders(v) for k, v in data.items()}
|
||||
elif isinstance(data, list):
|
||||
return [self.resolve_placeholders(v) for v in data]
|
||||
elif isinstance(data, str):
|
||||
return self.resolve_string_placeholders(data)
|
||||
else:
|
||||
return data
|
||||
|
||||
def resolve_string_placeholders(self, value: str) -> Any:
|
||||
pattern = r'\{\{\s*([^}]+)\s*\}\}'
|
||||
matches = re.findall(pattern, value)
|
||||
|
||||
for match in matches:
|
||||
parts = match.split('.')
|
||||
if len(parts) == 1: # Internal reference
|
||||
replacement = getattr(self._dir_config, parts[0], str(Path.home() / parts[0].lower()))
|
||||
elif len(parts) == 2 and parts[0] == 'Dir':
|
||||
replacement = getattr(self._dir_config, parts[1], str(Path.home() / parts[1].lower()))
|
||||
elif len(parts) == 2 and parts[0] == 'ENV':
|
||||
replacement = os.getenv(parts[1], '')
|
||||
else:
|
||||
replacement = value # Keep original if not recognized
|
||||
|
||||
value = value.replace('{{' + match + '}}', str(replacement))
|
||||
|
||||
# Convert to Path if it looks like a file path
|
||||
if isinstance(value, str) and (value.startswith(('/', '~')) or (':' in value and value[1] == ':')):
|
||||
return Path(value).expanduser()
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def create_dynamic_model(cls, **data):
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
data[key] = cls.create_dynamic_model(**value)
|
||||
elif isinstance(value, list) and all(isinstance(item, dict) for item in value):
|
||||
data[key] = [cls.create_dynamic_model(**item) for item in value]
|
||||
|
||||
DynamicModel = create_model(
|
||||
f'Dynamic{cls.__name__}',
|
||||
__base__=cls,
|
||||
**{k: (Any, v) for k, v in data.items()}
|
||||
)
|
||||
return DynamicModel(**data)
|
||||
|
||||
class Config:
|
||||
extra = "allow"
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
|
||||
class APIConfig(BaseModel):
|
||||
HOST: str
|
||||
PORT: int
|
||||
|
@ -34,7 +147,10 @@ class APIConfig(BaseModel):
|
|||
KEYS: List[str]
|
||||
|
||||
@classmethod
|
||||
def load(cls, config_path: Path, secrets_path: Path):
|
||||
def load(cls, config_path: Union[str, Path], secrets_path: Union[str, Path]):
|
||||
config_path = cls._resolve_path(config_path, 'config')
|
||||
secrets_path = cls._resolve_path(secrets_path, 'config')
|
||||
|
||||
# Load main configuration
|
||||
with open(config_path, 'r') as file:
|
||||
config_data = yaml.safe_load(file)
|
||||
|
@ -90,6 +206,16 @@ class APIConfig(BaseModel):
|
|||
|
||||
return cls(**config_data)
|
||||
|
||||
@classmethod
|
||||
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
|
||||
base_path = Path(__file__).parent.parent # This will be two levels up from this file
|
||||
path = Path(path)
|
||||
if not path.suffix:
|
||||
path = base_path / "sijapi" / default_dir / f"{path.name}.yaml"
|
||||
elif not path.is_absolute():
|
||||
path = base_path / path
|
||||
return path
|
||||
|
||||
@classmethod
|
||||
def resolve_placeholders(cls, config_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def resolve_value(value):
|
||||
|
@ -127,87 +253,6 @@ class APIConfig(BaseModel):
|
|||
return [module for module, is_active in self.MODULES.__dict__.items() if is_active]
|
||||
|
||||
|
||||
class Configuration(BaseModel):
|
||||
HOME: Path = Path.home()
|
||||
_dir_config: Optional['Configuration'] = None
|
||||
|
||||
@classmethod
|
||||
def load(cls, yaml_path: Union[str, Path], dir_config: Optional['Configuration'] = None) -> 'Configuration':
|
||||
yaml_path = Path(yaml_path)
|
||||
try:
|
||||
with yaml_path.open('r') as file:
|
||||
config_data = yaml.safe_load(file)
|
||||
|
||||
print(f"Loaded configuration data: {config_data}")
|
||||
|
||||
# Ensure HOME is set
|
||||
if config_data.get('HOME') is None:
|
||||
config_data['HOME'] = str(Path.home())
|
||||
print(f"HOME was None in config, set to default: {config_data['HOME']}")
|
||||
|
||||
load_dotenv()
|
||||
|
||||
instance = cls.create_dynamic_model(**config_data)
|
||||
instance._dir_config = dir_config or instance
|
||||
|
||||
resolved_data = instance.resolve_placeholders(config_data)
|
||||
for key, value in resolved_data.items():
|
||||
setattr(instance, key, value)
|
||||
|
||||
return instance
|
||||
except Exception as e:
|
||||
print(f"Error loading configuration from {yaml_path}: {str(e)}")
|
||||
raise
|
||||
|
||||
def resolve_placeholders(self, data: Any) -> Any:
|
||||
if isinstance(data, dict):
|
||||
return {k: self.resolve_placeholders(v) for k, v in data.items()}
|
||||
elif isinstance(data, list):
|
||||
return [self.resolve_placeholders(v) for v in data]
|
||||
elif isinstance(data, str):
|
||||
return self.resolve_string_placeholders(data)
|
||||
else:
|
||||
return data
|
||||
|
||||
def resolve_string_placeholders(self, value: str) -> Any:
|
||||
pattern = r'\{\{\s*([^}]+)\s*\}\}'
|
||||
matches = re.findall(pattern, value)
|
||||
|
||||
for match in matches:
|
||||
parts = match.split('.')
|
||||
if len(parts) == 1: # Internal reference
|
||||
replacement = getattr(self._dir_config, parts[0], str(Path.home() / parts[0].lower()))
|
||||
elif len(parts) == 2 and parts[0] == 'DIR':
|
||||
replacement = getattr(self._dir_config, parts[1], str(Path.home() / parts[1].lower()))
|
||||
elif len(parts) == 2 and parts[0] == 'ENV':
|
||||
replacement = os.getenv(parts[1], '')
|
||||
else:
|
||||
replacement = value # Keep original if not recognized
|
||||
|
||||
value = value.replace('{{' + match + '}}', str(replacement))
|
||||
|
||||
# Convert to Path if it looks like a file path
|
||||
if isinstance(value, str) and (value.startswith(('/', '~')) or (':' in value and value[1] == ':')):
|
||||
return Path(value).expanduser()
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def create_dynamic_model(cls, **data):
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
data[key] = cls.create_dynamic_model(**value)
|
||||
|
||||
DynamicModel = create_model(
|
||||
f'Dynamic{cls.__name__}',
|
||||
__base__=cls,
|
||||
**{k: (type(v), v) for k, v in data.items()}
|
||||
)
|
||||
return DynamicModel(**data)
|
||||
|
||||
class Config:
|
||||
extra = "allow"
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
class Location(BaseModel):
|
||||
latitude: float
|
||||
|
|
32
sijapi/config/news.yaml-example
Normal file
32
sijapi/config/news.yaml-example
Normal file
|
@ -0,0 +1,32 @@
|
|||
sites:
|
||||
- name: The Intercept
|
||||
url: https://theintercept.com
|
||||
max_articles: 5
|
||||
days_back: 14
|
||||
summarize: True
|
||||
tts: off
|
||||
tts_voice: Kiel
|
||||
podcast: True
|
||||
- name: The New York Times
|
||||
url: https://www.nytimes.com
|
||||
max_articles: 10
|
||||
days_back: 7
|
||||
summarize: True
|
||||
tts: off
|
||||
tts_voice: Luna
|
||||
podcast: True
|
||||
- name: The Guardian
|
||||
url: https://theguardian.com
|
||||
max_articles: 10
|
||||
days_back: 7
|
||||
summarize: True
|
||||
tts: off
|
||||
tts_voice: Attenborough
|
||||
podcast: True
|
||||
llm:
|
||||
model: llama3
|
||||
tts:
|
||||
model: elevenlabs-v2
|
||||
voice: Luna
|
||||
podcast: True
|
||||
|
|
@ -1,32 +1,214 @@
|
|||
from fastapi import APIRouter, BackgroundTasks, File, UploadFile, Form, HTTPException, Response, Query, Path as FastAPIPath
|
||||
from fastapi.responses import JSONResponse
|
||||
from zoneinfo import ZoneInfo
|
||||
from io import BytesIO
|
||||
from pydantic import BaseModel
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from markdownify import markdownify as md
|
||||
import os
|
||||
import mimetypes
|
||||
from datetime import datetime as dt_datetime
|
||||
import shutil
|
||||
import uuid
|
||||
import aiohttp
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
import shutil
|
||||
import requests
|
||||
import mimetypes
|
||||
from io import BytesIO
|
||||
from bs4 import BeautifulSoup
|
||||
from zoneinfo import ZoneInfo
|
||||
from urllib.parse import urlparse
|
||||
from urllib3.util.retry import Retry
|
||||
from datetime import datetime as dt_datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
import aiofiles
|
||||
import newspaper
|
||||
from newspaper import Article
|
||||
import trafilatura
|
||||
from readability import Document
|
||||
from markdownify import markdownify as md
|
||||
from requests.adapters import HTTPAdapter
|
||||
from sijapi import API, L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, File, UploadFile, Form, HTTPException, Response, Query, Path as FastAPIPath
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from sijapi.classes import Configuration
|
||||
from sijapi import API, L, Dir, News, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO
|
||||
from sijapi.utilities import sanitize_filename, assemble_journal_path, assemble_archive_path
|
||||
from sijapi.routers import llm, tts, asr, loc
|
||||
|
||||
from newspaper import Article
|
||||
|
||||
|
||||
|
||||
news = APIRouter()
|
||||
|
||||
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "summary", voice: str = DEFAULT_11L_VOICE):
|
||||
try:
|
||||
url = article.url
|
||||
source = trafilatura.fetch_url(url)
|
||||
|
||||
if source is None:
|
||||
# Fallback to newspaper3k if trafilatura fails
|
||||
article.download()
|
||||
article.parse()
|
||||
traf = None
|
||||
else:
|
||||
traf = trafilatura.extract_metadata(filecontent=source, default_url=url)
|
||||
article.download()
|
||||
article.parse()
|
||||
|
||||
# Update article properties, preferring trafilatura data when available
|
||||
article.title = traf.title if traf and traf.title else article.title or url
|
||||
article.authors = traf.author if traf and traf.author else article.authors or []
|
||||
article.publish_date = traf.date if traf and traf.date else article.publish_date
|
||||
try:
|
||||
article.publish_date = await loc.dt(article.publish_date, "UTC")
|
||||
except:
|
||||
L.DEBUG(f"Failed to localize {article.publish_date}")
|
||||
article.publish_date = await loc.dt(dt_datetime.now(), "UTC")
|
||||
article.meta_description = traf.description if traf and traf.description else article.meta_description
|
||||
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) if source else article.text
|
||||
article.top_image = traf.image if traf and traf.image else article.top_image
|
||||
article.source_url = traf.sitename if traf and traf.sitename else urlparse(url).netloc.replace('www.', '').title()
|
||||
article.meta_keywords = traf.categories or traf.tags if traf else article.meta_keywords or []
|
||||
article.meta_keywords = article.meta_keywords if isinstance(article.meta_keywords, list) else [article.meta_keywords]
|
||||
|
||||
if not is_article_within_date_range(article, earliest_date):
|
||||
return False
|
||||
|
||||
|
||||
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
||||
readable_title = sanitize_filename(article.title or timestamp)
|
||||
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
|
||||
|
||||
summary = await llm.summarize_text(article.text, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
||||
summary = summary.replace('\n', ' ') # Remove line breaks
|
||||
|
||||
if tts_mode == "full" or tts_mode == "content":
|
||||
tts_text = article.text
|
||||
elif tts_mode == "summary" or tts_mode == "excerpt":
|
||||
tts_text = summary
|
||||
else:
|
||||
tts_text = None
|
||||
|
||||
banner_markdown = ''
|
||||
try:
|
||||
banner_url = article.top_image
|
||||
if banner_url:
|
||||
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
|
||||
if banner_image:
|
||||
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
|
||||
except Exception as e:
|
||||
L.ERR(f"No image found in article")
|
||||
|
||||
|
||||
authors = ', '.join(['[[{}]]'.format(author.strip()) for author in article.authors if author.strip()])
|
||||
if not authors:
|
||||
authors = '[[Unknown Author]]'
|
||||
|
||||
frontmatter = f"""---
|
||||
title: {readable_title}
|
||||
authors: {authors}
|
||||
published: {article.publish_date}
|
||||
added: {timestamp}
|
||||
banner: "{banner_markdown}"
|
||||
tags:
|
||||
"""
|
||||
frontmatter += '\n'.join(f" - {tag}" for tag in article.meta_keywords)
|
||||
frontmatter += '\n---\n'
|
||||
|
||||
body = f"# {readable_title}\n\n"
|
||||
if tts_text:
|
||||
audio_filename = f"{article.publish_date.strftime('%Y-%m-%d')} {readable_title}"
|
||||
try:
|
||||
audio_path = await tts.generate_speech(
|
||||
bg_tasks=bg_tasks,
|
||||
text=tts_text,
|
||||
voice=voice,
|
||||
model="eleven_turbo_v2",
|
||||
podcast=True,
|
||||
title=audio_filename,
|
||||
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR
|
||||
)
|
||||
if isinstance(audio_path, Path):
|
||||
audio_ext = audio_path.suffix
|
||||
obsidian_link = f"![[{audio_path.name}]]"
|
||||
body += f"{obsidian_link}\n\n"
|
||||
else:
|
||||
L.WARN(f"Unexpected audio_path type: {type(audio_path)}. Value: {audio_path}")
|
||||
except Exception as e:
|
||||
L.ERR(f"Failed to generate TTS for {audio_filename}. Error: {str(e)}")
|
||||
L.ERR(f"TTS error details - voice: {voice}, model: eleven_turbo_v2, podcast: True")
|
||||
L.ERR(f"Output directory: {Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR}")
|
||||
|
||||
body += f"by {authors} in {article.source_url}\n\n"
|
||||
body += f"> [!summary]+\n"
|
||||
body += f"> {summary}\n\n"
|
||||
body += article.text
|
||||
|
||||
markdown_content = frontmatter + body
|
||||
|
||||
with open(markdown_filename, 'w') as md_file:
|
||||
md_file.write(markdown_content)
|
||||
|
||||
L.INFO(f"Successfully saved to {markdown_filename}")
|
||||
add_to_daily_note(relative_path)
|
||||
print(f"Saved article: {relative_path}")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
L.ERR(f"Error processing article from {article.url}: {str(e)}")
|
||||
return False
|
||||
|
||||
# You'll need to update your is_article_within_date_range function:
|
||||
def is_article_within_date_range(article, earliest_date):
|
||||
return article.publish_date is not None and article.publish_date.date() >= earliest_date
|
||||
|
||||
async def process_news_site(site, bg_tasks: BackgroundTasks):
|
||||
print(f"Downloading articles from {site.name}...")
|
||||
|
||||
earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back)
|
||||
|
||||
try:
|
||||
news_source = newspaper.build(site.url, memoize_articles=False)
|
||||
|
||||
tasks = []
|
||||
for article in news_source.articles[:site.max_articles]:
|
||||
task = asyncio.create_task(download_and_save_article(
|
||||
article,
|
||||
site.name,
|
||||
earliest_date,
|
||||
bg_tasks,
|
||||
tts_mode=site.tts if hasattr(site, 'tts') else "off",
|
||||
voice=site.tts if hasattr(site, 'tts') else DEFAULT_11L_VOICE
|
||||
))
|
||||
tasks.append(task)
|
||||
|
||||
results = await asyncio.gather(*tasks)
|
||||
articles_downloaded = sum(results)
|
||||
|
||||
print(f"Downloaded {articles_downloaded} articles from {site.name}")
|
||||
except Exception as e:
|
||||
print(f"Error processing {site.name}: {str(e)}")
|
||||
|
||||
# Update your news_refresh_endpoint function:
|
||||
@news.get("/news/refresh")
|
||||
async def news_refresh_endpoint(bg_tasks: BackgroundTasks):
|
||||
tasks = [process_news_site(site, bg_tasks) for site in News.sites]
|
||||
await asyncio.gather(*tasks)
|
||||
return "OK"
|
||||
|
||||
|
||||
async def generate_path(article, site_name):
|
||||
publish_date = await loc.dt(article.publish_date, 'UTC') if article.publish_date else await loc.dt(dt_datetime.now(), 'UTC')
|
||||
title_slug = "".join(c if c.isalnum() else "_" for c in article.title)
|
||||
filename = f"{site_name} - {title_slug[:50]}.md"
|
||||
absolute_path, relative_path = assemble_journal_path(publish_date, 'Articles', filename, extension='.md', no_timestamp=True)
|
||||
return absolute_path, relative_path
|
||||
|
||||
async def save_article_to_file(content, output_path):
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
async with aiofiles.open(output_path, 'w', encoding='utf-8') as file:
|
||||
await file.write(content)
|
||||
|
||||
|
||||
|
||||
### CLIPPER ###
|
||||
@news.post("/clip")
|
||||
async def clip_post(
|
||||
|
|
|
@ -143,7 +143,7 @@ async def generate_speech(
|
|||
# raise HTTPException(status_code=400, detail="Invalid model specified")
|
||||
|
||||
if podcast == True:
|
||||
podcast_path = PODCAST_DIR / audio_file_path.name
|
||||
podcast_path = Path(PODCAST_DIR) / audio_file_path.name
|
||||
L.DEBUG(f"Podcast path: {podcast_path}")
|
||||
shutil.copy(str(audio_file_path), str(podcast_path))
|
||||
bg_tasks.add_task(os.remove, str(audio_file_path))
|
||||
|
@ -152,7 +152,7 @@ async def generate_speech(
|
|||
return str(audio_file_path)
|
||||
|
||||
except Exception as e:
|
||||
L.ERROR(f"Failed to generate speech: {str(e)}")
|
||||
L.ERR(f"Failed to generate speech: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
|
||||
|
||||
|
||||
|
@ -331,7 +331,7 @@ async def local_tts(
|
|||
|
||||
# Export the combined audio in a separate thread
|
||||
if podcast:
|
||||
podcast_file_path = PODCAST_DIR / file_path.name
|
||||
podcast_file_path = Path(PODCAST_DIR) / file_path.name
|
||||
await asyncio.to_thread(combined_audio.export, podcast_file_path, format="wav")
|
||||
|
||||
await asyncio.to_thread(combined_audio.export, file_path, format="wav")
|
||||
|
@ -425,7 +425,7 @@ def copy_to_podcast_dir(file_path):
|
|||
file_name = Path(file_path).name
|
||||
|
||||
# Construct the destination path in the PODCAST_DIR
|
||||
destination_path = PODCAST_DIR / file_name
|
||||
destination_path = Path(PODCAST_DIR) / file_name
|
||||
|
||||
# Copy the file to the PODCAST_DIR
|
||||
shutil.copy(file_path, destination_path)
|
||||
|
|
Loading…
Add table
Reference in a new issue