#!/usr/bin/env python3

import sys
import asyncio
import trafilatura
from newspaper import Article
from urllib.parse import urlparse
from datetime import datetime
import math
from typing import Optional
import textwrap

async def fetch_and_parse_article(url: str):
    # Try trafilatura first
    source = trafilatura.fetch_url(url)
    
    if source:
        try:
            traf = trafilatura.extract_metadata(filecontent=source, default_url=url)
            
            article = Article(url)
            article.set_html(source)
            article.parse()
            
            # Update article properties with trafilatura data
            article.title = article.title or traf.title or url
            article.authors = article.authors or (traf.author if isinstance(traf.author, list) else [traf.author])
            article.publish_date = traf.date or datetime.now()
            article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text
            article.top_image = article.top_image or traf.image
            article.source_url = traf.sitename or urlparse(url).netloc.replace('www.', '').title()
            
            return article
        except Exception:
            pass
    
    # Fallback to newspaper3k
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        }
        
        article = Article(url)
        article.config.browser_user_agent = headers['User-Agent']
        article.config.headers = headers
        article.download()
        article.parse()
        
        article.source_url = urlparse(url).netloc.replace('www.', '').title()
        return article
    
    except Exception as e:
        raise Exception(f"Failed to parse article from {url}: {str(e)}")

def format_article_markdown(article) -> str:
    # Format title
    output = f"# {article.title}\n\n"
    
    # Format metadata
    if article.authors:
        authors = article.authors if isinstance(article.authors, list) else [article.authors]
        output += f"*By {', '.join(filter(None, authors))}*\n\n"
    
    if article.publish_date:
        date_str = article.publish_date.strftime("%Y-%m-%d") if isinstance(article.publish_date, datetime) else str(article.publish_date)
        output += f"*Published: {date_str}*\n\n"
    
    if article.top_image:
        output += f"![Article Image]({article.top_image})\n\n"
    
    # Format article text with proper wrapping
    if article.text:
        paragraphs = article.text.split('\n')
        wrapped_paragraphs = []
        
        for paragraph in paragraphs:
            if paragraph.strip():
                wrapped = textwrap.fill(paragraph.strip(), width=80)
                wrapped_paragraphs.append(wrapped)
        
        output += '\n\n'.join(wrapped_paragraphs)
    
    return output

async def main():
    if len(sys.argv) != 2:
        print("Usage: ./n3k <article_url>")
        sys.exit(1)
    
    url = sys.argv[1]
    try:
        article = await fetch_and_parse_article(url)
        formatted_content = format_article_markdown(article)
        print(formatted_content)
    except Exception as e:
        print(f"Error processing article: {str(e)}")
        sys.exit(1)

if __name__ == "__main__":
    asyncio.run(main())