From 61cb3a1dbae10e4fa68ef52e8662d8deefd162da Mon Sep 17 00:00:00 2001 From: sanj <67624670+iodrift@users.noreply.github.com> Date: Wed, 28 Aug 2024 10:20:34 -0700 Subject: [PATCH] Cleanup --- sijapi/helpers/{ => article}/article.py | 0 sijapi/helpers/{ => caplss}/CaPLSS.py | 0 .../CaPLSS_downloader_and_importer.py | 0 .../{ => caplss}/Townships_progress.json | 0 sijapi/helpers/{ => caplss}/plss.py | 0 sijapi/helpers/{ => db}/db.py | 0 sijapi/helpers/{ => db}/db_get_schema.py | 0 sijapi/helpers/{ => db}/db_repl.py | 0 sijapi/helpers/{ => db}/db_replicator.py | 0 sijapi/helpers/{ => db}/db_uuid_migrate.py | 0 sijapi/helpers/{ => db}/migrate_db_to_uuid.py | 0 .../migrate_query_tracking_to_uuid.sh | 0 sijapi/helpers/{ => db}/repair_weather_db.py | 0 sijapi/helpers/{ => db}/repl.py | 0 sijapi/helpers/{ => db}/repl.sh | 0 sijapi/helpers/{ => db}/replicator.py | 0 sijapi/helpers/{ => db}/schema_info.yaml | 0 ...levenlabs_history_ids_20240630_131617.json | 104 ------------------ .../helpers/{ => email}/log_prior_emails.py | 0 sijapi/helpers/fromvm/{ => db}/db.py | 0 .../{ => fromvm/db}/db_connection_test.py | 0 .../helpers/fromvm/{ => db}/db_get_schema.py | 0 .../fromvm/{ => db}/db_uuid_migrate.py | 0 sijapi/helpers/fromvm/db_connection_test.py | 66 ----------- .../helpers/{ => thp}/CalFire_THP_scraper.py | 0 sijapi/helpers/{ => thp}/thp.py | 0 sijapi/helpers/upscaler.py | 34 ------ sijapi/routers/news.py | 66 ++++++++--- 28 files changed, 49 insertions(+), 221 deletions(-) rename sijapi/helpers/{ => article}/article.py (100%) rename sijapi/helpers/{ => caplss}/CaPLSS.py (100%) rename sijapi/helpers/{ => caplss}/CaPLSS_downloader_and_importer.py (100%) rename sijapi/helpers/{ => caplss}/Townships_progress.json (100%) rename sijapi/helpers/{ => caplss}/plss.py (100%) rename sijapi/helpers/{ => db}/db.py (100%) rename sijapi/helpers/{ => db}/db_get_schema.py (100%) rename sijapi/helpers/{ => db}/db_repl.py (100%) rename sijapi/helpers/{ => db}/db_replicator.py (100%) rename sijapi/helpers/{ => db}/db_uuid_migrate.py (100%) rename sijapi/helpers/{ => db}/migrate_db_to_uuid.py (100%) rename sijapi/helpers/{ => db}/migrate_query_tracking_to_uuid.sh (100%) rename sijapi/helpers/{ => db}/repair_weather_db.py (100%) rename sijapi/helpers/{ => db}/repl.py (100%) rename sijapi/helpers/{ => db}/repl.sh (100%) rename sijapi/helpers/{ => db}/replicator.py (100%) rename sijapi/helpers/{ => db}/schema_info.yaml (100%) delete mode 100644 sijapi/helpers/elevenlabs_history_ids_20240630_131617.json rename sijapi/helpers/{ => email}/log_prior_emails.py (100%) rename sijapi/helpers/fromvm/{ => db}/db.py (100%) rename sijapi/helpers/{ => fromvm/db}/db_connection_test.py (100%) rename sijapi/helpers/fromvm/{ => db}/db_get_schema.py (100%) rename sijapi/helpers/fromvm/{ => db}/db_uuid_migrate.py (100%) delete mode 100644 sijapi/helpers/fromvm/db_connection_test.py rename sijapi/helpers/{ => thp}/CalFire_THP_scraper.py (100%) rename sijapi/helpers/{ => thp}/thp.py (100%) delete mode 100644 sijapi/helpers/upscaler.py diff --git a/sijapi/helpers/article.py b/sijapi/helpers/article/article.py similarity index 100% rename from sijapi/helpers/article.py rename to sijapi/helpers/article/article.py diff --git a/sijapi/helpers/CaPLSS.py b/sijapi/helpers/caplss/CaPLSS.py similarity index 100% rename from sijapi/helpers/CaPLSS.py rename to sijapi/helpers/caplss/CaPLSS.py diff --git a/sijapi/helpers/CaPLSS_downloader_and_importer.py b/sijapi/helpers/caplss/CaPLSS_downloader_and_importer.py similarity index 100% rename from sijapi/helpers/CaPLSS_downloader_and_importer.py rename to sijapi/helpers/caplss/CaPLSS_downloader_and_importer.py diff --git a/sijapi/helpers/Townships_progress.json b/sijapi/helpers/caplss/Townships_progress.json similarity index 100% rename from sijapi/helpers/Townships_progress.json rename to sijapi/helpers/caplss/Townships_progress.json diff --git a/sijapi/helpers/plss.py b/sijapi/helpers/caplss/plss.py similarity index 100% rename from sijapi/helpers/plss.py rename to sijapi/helpers/caplss/plss.py diff --git a/sijapi/helpers/db.py b/sijapi/helpers/db/db.py similarity index 100% rename from sijapi/helpers/db.py rename to sijapi/helpers/db/db.py diff --git a/sijapi/helpers/db_get_schema.py b/sijapi/helpers/db/db_get_schema.py similarity index 100% rename from sijapi/helpers/db_get_schema.py rename to sijapi/helpers/db/db_get_schema.py diff --git a/sijapi/helpers/db_repl.py b/sijapi/helpers/db/db_repl.py similarity index 100% rename from sijapi/helpers/db_repl.py rename to sijapi/helpers/db/db_repl.py diff --git a/sijapi/helpers/db_replicator.py b/sijapi/helpers/db/db_replicator.py similarity index 100% rename from sijapi/helpers/db_replicator.py rename to sijapi/helpers/db/db_replicator.py diff --git a/sijapi/helpers/db_uuid_migrate.py b/sijapi/helpers/db/db_uuid_migrate.py similarity index 100% rename from sijapi/helpers/db_uuid_migrate.py rename to sijapi/helpers/db/db_uuid_migrate.py diff --git a/sijapi/helpers/migrate_db_to_uuid.py b/sijapi/helpers/db/migrate_db_to_uuid.py similarity index 100% rename from sijapi/helpers/migrate_db_to_uuid.py rename to sijapi/helpers/db/migrate_db_to_uuid.py diff --git a/sijapi/helpers/migrate_query_tracking_to_uuid.sh b/sijapi/helpers/db/migrate_query_tracking_to_uuid.sh similarity index 100% rename from sijapi/helpers/migrate_query_tracking_to_uuid.sh rename to sijapi/helpers/db/migrate_query_tracking_to_uuid.sh diff --git a/sijapi/helpers/repair_weather_db.py b/sijapi/helpers/db/repair_weather_db.py similarity index 100% rename from sijapi/helpers/repair_weather_db.py rename to sijapi/helpers/db/repair_weather_db.py diff --git a/sijapi/helpers/repl.py b/sijapi/helpers/db/repl.py similarity index 100% rename from sijapi/helpers/repl.py rename to sijapi/helpers/db/repl.py diff --git a/sijapi/helpers/repl.sh b/sijapi/helpers/db/repl.sh similarity index 100% rename from sijapi/helpers/repl.sh rename to sijapi/helpers/db/repl.sh diff --git a/sijapi/helpers/replicator.py b/sijapi/helpers/db/replicator.py similarity index 100% rename from sijapi/helpers/replicator.py rename to sijapi/helpers/db/replicator.py diff --git a/sijapi/helpers/schema_info.yaml b/sijapi/helpers/db/schema_info.yaml similarity index 100% rename from sijapi/helpers/schema_info.yaml rename to sijapi/helpers/db/schema_info.yaml diff --git a/sijapi/helpers/elevenlabs_history_ids_20240630_131617.json b/sijapi/helpers/elevenlabs_history_ids_20240630_131617.json deleted file mode 100644 index d10f7da..0000000 --- a/sijapi/helpers/elevenlabs_history_ids_20240630_131617.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "history_item_ids": [ - "ncRYNd0Xef4LiUE74VjP", - "13pQLDAPYGIATwW1ySL5", - "dhsQNAYTWpcwo1X6rixf", - "V7wUip1NJuWAUw26sePF", - "mOYMa5lcI7wRHddIQTSa", - "mP97iOpA4oG7pwUBthq4", - "WTU5nsX6qZCYxLyoT5hq", - "15DPGnBgjr74KT3TMbK4", - "aCyBS1zoaweVjUoPf2TF", - "J8SUMQqZPtoy3Cgdhi3J", - "qKHaaJHfqh2je60Wmadb", - "2PaugQJ8c4rY44JGlaO5", - "TwzxcmYjo6XNebbMabcd", - "xdEK7rYq9UofOlkr565b", - "wik4jYd97aGMLgttTjC9", - "7oXn2yH7gdyhi6sEoWKd", - "jv8aZFiVe8gPMrAOBcNT", - "B2BctCDkCtLDxEMMBu9z", - "4KFO77NHDruNQvXIykwp", - "d033NizZaNZPc45fvxCO", - "yBKxOxfzsjpZYOFzoIM7", - "oEihKwMLWgvvoTLGx4yF", - "Q3guBm4hGml0KPAWKl7t", - "jaojY1gSafQmqshR48oT", - "yqGDMfcceaoceFEEurqa", - "oLdnyUp7plGrUMRVQ8Cf", - "FZAGCGosYEGMf8GCRFaA", - "TrWnXRdGkiH0K9kgwFiS", - "th16OEbg3u0XHslT9A33", - "856BAsn6dnzF7HeqGPfK", - "KjLoAfDXVBqR9s39T25j", - "uHQQJMMOfOxPAhEYQXLl", - "HO8WCIhkkI7AxwkU5MC6", - "9nxdesHWTRLCOd6YgWe9", - "tmx5tlIQ7hdSTgJt16P2", - "M9JN0YcBuCF6LhnqKN66", - "M9xkP4ecn0LIi7mQOfU6", - "CNtJgh52Ykh9ZqEppZeH", - "lgobcoiqmtWfbXkhEwbE", - "nr9jxnsE4DnwmTwCaHqC", - "Rnzo03tcyBqGPdmHemCb", - "X3YVGp7yf9GLgZ7WOuSU", - "wL3bkqxR9xqeFTvkJpSI", - "wNx3XDgFLTjVbMyGrIAO", - "rb0jj1ywBetmdvve5qIL", - "WdNnqvNswXeh6JFoaRSS", - "WT2ViyerKpodYmHDHhCw", - "OvhIRehXNwx7xMJHuTd7", - "EQb1iZtsADxJ0GxLJzEK", - "WXVfBJYoYGB7S61VyETD", - "q0q3Di1YJKF07dOhoa7E", - "a2XBIUPa68UiiKlzwFnG", - "YBuD7KsUpz8jxc5ItZcF", - "KdoucRVCVQGRVQ8Di9Ih", - "CkmDny98GEdfGuj2kaAx", - "R0R2p8luRZL7wwPtDilw", - "awvztgQnuaquK0dTpIuH", - "3ZPN0nJo8UQZYhFhoIOK", - "RJJeTkjYIgdv1ZoXXAax", - "ppxUNzWHAQafsM6OvEUE", - "f2VBm7yE7qmnjdS9CbYz", - "SZIMwz2T5ZAhTxTDBFol", - "YjC91PRgnQbAcdPhnWqU", - "fDTV7n8f6QK5yCwLkBwg", - "KbPpWUuiLPADj9H3OlvG", - "DIuqVoAg7lLxpvFBip84", - "pEwFAKMLGWUMHqfljJSq", - "9wwl7UbsgeKqrk8kNZin", - "2uLvjJgcZDiY9dqB8JlP", - "U5f1qZQM08t2YzJqEmxK", - "gnwn7QIhrCXRAGNddZ1H", - "g5nGEIHirFzKstdrGI1h", - "CQWH5dGSeS38VC4X4yg7", - "C5YGjhJPrTkVOpxIOHdj", - "YLbtnf1pSb9Ra7wgFHiF", - "qNLgNSvMr4VSoisKS9qj", - "Bq2ALvQVsj9L2wMpUvYO", - "gi0yTXLZLMhUKeKcalWc", - "3JQN9UbCsqj9ggi5sCkq", - "oPflJoA9kqBzjlmWY6zL", - "0kUZFgtZdqgdUBXFsXs9", - "aFTi7XdjR8W52ThmFpgc", - "pgIfjcy2UvKggfqJ1aNx", - "r0VguLaqnxTL9jza9H4y", - "444ehr4RtqgU1xjhhTLo", - "pEuzoznVDaQRBhIA9VTy", - "T9hdW9eJkEqDmOsSUoeY", - "wJjHbGzoWiKKOIGmf82T", - "kij4uMmkUlsSDu2zSH1k", - "oWt5rns196JsKIYPyrBS", - "SJ1m9mSOGOLIhkMgA8kq", - "kAaqe0ATrYtkifmZLOE5", - "O2Pvz7CP5rfyNvzFSDmy", - "w1rb8qN5nohVUovC0XAx", - "njFs4I4F7rtd9I6fEn6x", - "miFrp9GBm3MsHO03Z4eY", - "5DJywiPsfeVP9hFdqRhd", - "mUephoXhk5QdWrOfr9Xr", - "tDDiW3Yp0BptZ2wBv21A", - "YpX06liXWHquUVYFlKYa" - ] -} \ No newline at end of file diff --git a/sijapi/helpers/log_prior_emails.py b/sijapi/helpers/email/log_prior_emails.py similarity index 100% rename from sijapi/helpers/log_prior_emails.py rename to sijapi/helpers/email/log_prior_emails.py diff --git a/sijapi/helpers/fromvm/db.py b/sijapi/helpers/fromvm/db/db.py similarity index 100% rename from sijapi/helpers/fromvm/db.py rename to sijapi/helpers/fromvm/db/db.py diff --git a/sijapi/helpers/db_connection_test.py b/sijapi/helpers/fromvm/db/db_connection_test.py similarity index 100% rename from sijapi/helpers/db_connection_test.py rename to sijapi/helpers/fromvm/db/db_connection_test.py diff --git a/sijapi/helpers/fromvm/db_get_schema.py b/sijapi/helpers/fromvm/db/db_get_schema.py similarity index 100% rename from sijapi/helpers/fromvm/db_get_schema.py rename to sijapi/helpers/fromvm/db/db_get_schema.py diff --git a/sijapi/helpers/fromvm/db_uuid_migrate.py b/sijapi/helpers/fromvm/db/db_uuid_migrate.py similarity index 100% rename from sijapi/helpers/fromvm/db_uuid_migrate.py rename to sijapi/helpers/fromvm/db/db_uuid_migrate.py diff --git a/sijapi/helpers/fromvm/db_connection_test.py b/sijapi/helpers/fromvm/db_connection_test.py deleted file mode 100644 index 0d94404..0000000 --- a/sijapi/helpers/fromvm/db_connection_test.py +++ /dev/null @@ -1,66 +0,0 @@ -import asyncio -import asyncpg -import psycopg2 -import sys - -async def try_async_connect(host, port, user, password, database): - try: - conn = await asyncpg.connect( - host=host, - port=port, - user=user, - password=password, - database=database - ) - version = await conn.fetchval('SELECT version()') - print(f"Async connection successful to {host}:{port}") - print(f"PostgreSQL version: {version}") - await conn.close() - return True - except Exception as e: - print(f"Async connection failed to {host}:{port}") - print(f"Error: {str(e)}") - return False - -def try_sync_connect(host, port, user, password, database): - try: - conn = psycopg2.connect( - host=host, - port=port, - user=user, - password=password, - database=database - ) - cur = conn.cursor() - cur.execute('SELECT version()') - version = cur.fetchone()[0] - print(f"Sync connection successful to {host}:{port}") - print(f"PostgreSQL version: {version}") - conn.close() - return True - except Exception as e: - print(f"Sync connection failed to {host}:{port}") - print(f"Error: {str(e)}") - return False - -async def main(): - # Database connection parameters - port = 5432 - user = 'sij' - password = 'Synchr0!' - database = 'sij' - - hosts = ['100.64.64.20', '127.0.0.1', 'localhost'] - - print("Attempting asynchronous connections:") - for host in hosts: - await try_async_connect(host, port, user, password, database) - print() - - print("Attempting synchronous connections:") - for host in hosts: - try_sync_connect(host, port, user, password, database) - print() - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/sijapi/helpers/CalFire_THP_scraper.py b/sijapi/helpers/thp/CalFire_THP_scraper.py similarity index 100% rename from sijapi/helpers/CalFire_THP_scraper.py rename to sijapi/helpers/thp/CalFire_THP_scraper.py diff --git a/sijapi/helpers/thp.py b/sijapi/helpers/thp/thp.py similarity index 100% rename from sijapi/helpers/thp.py rename to sijapi/helpers/thp/thp.py diff --git a/sijapi/helpers/upscaler.py b/sijapi/helpers/upscaler.py deleted file mode 100644 index 4e7ea5d..0000000 --- a/sijapi/helpers/upscaler.py +++ /dev/null @@ -1,34 +0,0 @@ -from aura_sr import AuraSR -from PIL import Image -import torch -import os - -# Set environment variables for MPS -os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0' -os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' - -# Initialize device as CPU for default -device = torch.device('cpu') - -# Check if MPS is available -if torch.backends.mps.is_available(): - if not torch.backends.mps.is_built(): - print("MPS not available because the current PyTorch install was not built with MPS enabled.") - else: - device = torch.device('mps:0') - -# Overwrite the default CUDA device with MPS -torch.cuda.default_stream = device - -aura_sr = AuraSR.from_pretrained("fal-ai/AuraSR").to(device) - -def load_image_from_path(file_path): - return Image.open(file_path) - -def upscale_and_save(original_path): - original_image = load_image_from_path(original_path) - upscaled_image = aura_sr.upscale_4x(original_image) - upscaled_image.save(original_path) - -# Insert your image path -upscale_and_save("/Users/sij/workshop/sijapi/sijapi/testbed/API__00482_ 2.png") diff --git a/sijapi/routers/news.py b/sijapi/routers/news.py index decbb07..6663d65 100644 --- a/sijapi/routers/news.py +++ b/sijapi/routers/news.py @@ -195,24 +195,56 @@ async def process_and_save_article( raise HTTPException(status_code=500, detail=str(e)) + + +from newspaper import Article as NewspaperArticle + async def fetch_and_parse_article(url: str) -> Article: + # Try trafilatura first source = trafilatura.fetch_url(url) - traf = trafilatura.extract_metadata(filecontent=source, default_url=url) - - article = Article(url) - article.set_html(source) - article.parse() - - # Update article properties with trafilatura data - article.title = article.title or traf.title or url - article.authors = article.authors or (traf.author if isinstance(traf.author, list) else [traf.author]) - article.publish_date = await gis.dt(article.publish_date or traf.date or dt_datetime.now(), "UTC") - article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text - article.top_image = article.top_image or traf.image - article.source_url = traf.sitename or urlparse(url).netloc.replace('www.', '').title() - article.meta_keywords = list(set(article.meta_keywords or traf.categories or traf.tags or [])) - - return article + + if source: + try: + traf = trafilatura.extract_metadata(filecontent=source, default_url=url) + + article = Article(url) + article.set_html(source) + article.parse() + + # Update article properties with trafilatura data + article.title = article.title or traf.title or url + article.authors = article.authors or (traf.author if isinstance(traf.author, list) else [traf.author]) + article.publish_date = await gis.dt(article.publish_date or traf.date or dt_datetime.now(), "UTC") + article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text + article.top_image = article.top_image or traf.image + article.source_url = traf.sitename or urlparse(url).netloc.replace('www.', '').title() + article.meta_keywords = list(set(article.meta_keywords or traf.categories or traf.tags or [])) + + return article + except Exception as e: + l.warning(f"Trafilatura failed to parse {url}: {str(e)}. Falling back to newspaper3k.") + else: + l.warning(f"Trafilatura failed to fetch {url}. Falling back to newspaper3k.") + + # If trafilatura fails, use newspaper3k + try: + newspaper_article = NewspaperArticle(url) + newspaper_article.download() + newspaper_article.parse() + + article = Article(url) + article.title = newspaper_article.title + article.authors = newspaper_article.authors + article.publish_date = await gis.dt(newspaper_article.publish_date or dt_datetime.now(), "UTC") + article.text = newspaper_article.text + article.top_image = newspaper_article.top_image + article.source_url = urlparse(url).netloc.replace('www.', '').title() + article.meta_keywords = newspaper_article.keywords + + return article + except Exception as e: + l.error(f"Both trafilatura and newspaper3k failed to fetch and parse {url}: {str(e)}") + raise HTTPException(status_code=500, detail="Failed to fetch and parse article content") def generate_markdown_content(article, title: str, summary: str, audio_link: Optional[str], site_name: Optional[str] = None) -> str: @@ -258,4 +290,4 @@ tags: body += f"> [!summary]+\n> {summary}\n\n" body += article.text - return frontmatter + body \ No newline at end of file + return frontmatter + body