Auto-update: Mon Aug 5 17:56:51 PDT 2024

2024-08-05 17:56:51 -07:00 · 2024-08-05 17:56:51 -07:00 · 4f25c3a5fc
commit 4f25c3a5fc
parent 954ad967e9
1 changed files with 29 additions and 25 deletions
--- a/sijapi/utilities.py
+++ b/sijapi/utilities.py
@ -26,6 +26,10 @@ import ipaddress
 from scipy.spatial import cKDTree
 from dateutil.parser import parse as dateutil_parse
 from docx import Document
 import aiohttp
 from bs4 import BeautifulSoup
 from readability import Document as ReadabilityDocument
 from markdownify import markdownify as md
 from sshtunnel import SSHTunnelForwarder
 from urllib.parse import urlparse
 from fastapi import Depends, HTTPException, Request, UploadFile
@ -561,7 +565,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
        return None
    # Use readability to extract the main content
-        doc = Document(html_content)
+    doc = ReadabilityDocument(html_content)
    cleaned_html = doc.summary()
    # Parse the cleaned HTML with BeautifulSoup for any additional processing