#!/usr/bin/env python3

import argparse
import os
import re
import subprocess
import sys
import urllib.request
import urllib.error

############################
# Shared Data & Constants
############################

BUILTIN_MODULES = {
    'abc', 'aifc', 'argparse', 'array', 'ast', 'asynchat', 'asyncio', 'asyncore', 'atexit',
    'audioop', 'base64', 'bdb', 'binascii', 'binhex', 'bisect', 'builtins', 'bz2', 'calendar',
    'cgi', 'cgitb', 'chunk', 'cmath', 'cmd', 'code', 'codecs', 'codeop', 'collections', 'colorsys',
    'compileall', 'concurrent', 'configparser', 'contextlib', 'copy', 'copyreg', 'crypt', 'csv',
    'ctypes', 'curses', 'dataclasses', 'datetime', 'dbm', 'decimal', 'difflib', 'dis', 'distutils',
    'doctest', 'dummy_threading', 'email', 'encodings', 'ensurepip', 'enum', 'errno', 'faulthandler',
    'fcntl', 'filecmp', 'fileinput', 'fnmatch', 'formatter', 'fractions', 'ftplib', 'functools',
    'gc', 'getopt', 'getpass', 'gettext', 'glob', 'gzip', 'hashlib', 'heapq', 'hmac', 'html', 'http',
    'imaplib', 'imghdr', 'imp', 'importlib', 'inspect', 'io', 'ipaddress', 'itertools', 'json',
    'keyword', 'lib2to3', 'linecache', 'locale', 'logging', 'lzma', 'mailbox', 'mailcap', 'marshal',
    'math', 'mimetypes', 'modulefinder', 'multiprocessing', 'netrc', 'nntplib', 'numbers', 'operator',
    'optparse', 'os', 'ossaudiodev', 'parser', 'pathlib', 'pdb', 'pickle', 'pickletools', 'pipes',
    'pkgutil', 'platform', 'plistlib', 'poplib', 'posix', 'pprint', 'profile', 'pstats', 'pty',
    'pwd', 'py_compile', 'pyclbr', 'pydoc', 'queue', 'quopri', 'random', 're', 'readline',
    'reprlib', 'resource', 'rlcompleter', 'runpy', 'sched', 'secrets', 'select', 'selectors', 'shelve',
    'shlex', 'shutil', 'signal', 'site', 'smtpd', 'smtplib', 'sndhdr', 'socket', 'socketserver',
    'spwd', 'sqlite3', 'ssl', 'stat', 'statistics', 'string', 'stringprep', 'struct', 'subprocess',
    'sunau', 'symtable', 'sys', 'sysconfig', 'syslog', 'tabnanny', 'tarfile', 'telnetlib', 'tempfile',
    'termios', 'test', 'textwrap', 'threading', 'time', 'timeit', 'token', 'tokenize', 'trace',
    'traceback', 'tracemalloc', 'tty', 'turtle', 'types', 'typing', 'unicodedata', 'unittest',
    'urllib', 'uu', 'uuid', 'venv', 'warnings', 'wave', 'weakref', 'webbrowser', 'xdrlib', 'xml',
    'xmlrpc', 'zipapp', 'zipfile', 'zipimport', 'zlib'
}

KNOWN_CORRECTIONS = {
    'dateutil': 'python-dateutil',
    'dotenv': 'python-dotenv',
    'docx': 'python-docx',
    'tesseract': 'pytesseract',
    'magic': 'python-magic',
    'multipart': 'python-multipart',
    'newspaper': 'newspaper3k',
    'srtm': 'elevation',
    'yaml': 'pyyaml',
    'zoneinfo': 'backports.zoneinfo'
}

EXCLUDED_NAMES = {'models', 'data', 'convert', 'example', 'tests'}

############################
# Environment & Installation
############################

def run_command(command):
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()
    return process.returncode, stdout.decode(), stderr.decode()

def which(cmd):
    """
    Check if `cmd` is on PATH. Returns True if found, else False.
    """
    for path in os.environ["PATH"].split(os.pathsep):
        if os.path.isdir(path):
            cmd_path = os.path.join(path, cmd)
            if os.access(cmd_path, os.X_OK):
                return True
    return False

def in_conda_env():
    """
    Returns True if we appear to be in a conda environment,
    typically indicated by CONDA_DEFAULT_ENV or other variables.
    """
    return "CONDA_DEFAULT_ENV" in os.environ

def is_package_installed(package, skip_conda=False):
    """
    Checks if 'package' is installed in either mamba/conda (if in conda env and skip_conda=False) or pip.
    """
    if not skip_conda and in_conda_env():
        # Try mamba or conda
        if which("mamba"):
            returncode, stdout, _ = run_command(["mamba", "list"])
            if returncode == 0:
                pattern = rf"^{re.escape(package)}\s"
                if re.search(pattern, stdout, re.MULTILINE):
                    return True
        elif which("conda"):
            returncode, stdout, _ = run_command(["conda", "list"])
            if returncode == 0:
                pattern = rf"^{re.escape(package)}\s"
                if re.search(pattern, stdout, re.MULTILINE):
                    return True

    # Fall back to pip
    returncode, stdout, _ = run_command(["pip", "list"])
    pattern = rf"^{re.escape(package)}\s"
    return re.search(pattern, stdout, re.MULTILINE) is not None

def install_package(package, skip_conda=False):
    """
    Installs 'package'.
      1) If skip_conda=False and in conda env -> try mamba or conda
      2) Fallback to pip
    """
    if is_package_installed(package, skip_conda=skip_conda):
        print(f"Package '{package}' is already installed.")
        return

    if not skip_conda and in_conda_env():
        # check mamba
        if which("mamba"):
            print(f"Installing '{package}' with mamba...")
            returncode, _, _ = run_command(["mamba", "install", "-y", "-c", "conda-forge", package])
            if returncode == 0:
                print(f"Successfully installed '{package}' via mamba.")
                return
            print(f"mamba failed for '{package}', falling back to conda/pip...")

        # check conda
        if which("conda"):
            print(f"Installing '{package}' with conda...")
            returncode, _, _ = run_command(["conda", "install", "-y", "-c", "conda-forge", package])
            if returncode == 0:
                print(f"Successfully installed '{package}' via conda.")
                return
            print(f"conda failed for '{package}', falling back to pip...")

    # fallback: pip
    print(f"Installing '{package}' with pip...")
    returncode, _, _ = run_command(["pip", "install", package])
    if returncode != 0:
        print(f"Failed to install package '{package}'.")
    else:
        print(f"Successfully installed '{package}' via pip.")

############################
# Parsing Python Imports
############################

def process_requirements_file(file_path):
    packages = set()
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line and not line.startswith('#'):
                packages.add(line)
    return packages

def process_python_file(file_path):
    """
    Return a set of external imports (not built-in or excluded).
    Applies known corrections to recognized package names.
    """
    imports = set()
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        content = f.read()

    for line in content.split('\n'):
        line = line.strip()
        if line.startswith(('import ', 'from ')) and not line.startswith('#'):
            if line.startswith('import '):
                modules = line.replace('import ', '').split(',')
                for mod in modules:
                    mod = re.sub(r'\s+as\s+\w+', '', mod).split('.')[0].strip()
                    if mod and not mod.isupper() and mod not in EXCLUDED_NAMES and mod not in BUILTIN_MODULES:
                        imports.add(KNOWN_CORRECTIONS.get(mod, mod))
            elif line.startswith('from '):
                mod = line.split(' ')[1].split('.')[0].strip()
                if mod and not mod.isupper() and mod not in EXCLUDED_NAMES and mod not in BUILTIN_MODULES:
                    imports.add(KNOWN_CORRECTIONS.get(mod, mod))

    return imports

def find_imports_in_path(path, recurse=False):
    """
    Finds Python imports in the specified path. If path is a file, parse that file;
    if path is a dir, parse .py files in that dir. Recurse subdirs if 'recurse=True'.
    """
    imports = set()
    if not os.path.exists(path):
        print(f"Warning: Path does not exist: {path}")
        return imports

    if os.path.isfile(path):
        if path.endswith('.py'):
            imports.update(process_python_file(path))
        else:
            print(f"Skipping non-Python file: {path}")
        return imports

    # Directory:
    if recurse:
        for root, _, filenames in os.walk(path):
            for fn in filenames:
                if fn.endswith('.py'):
                    imports.update(process_python_file(os.path.join(root, fn)))
    else:
        for fn in os.listdir(path):
            fullpath = os.path.join(path, fn)
            if os.path.isfile(fullpath) and fn.endswith('.py'):
                imports.update(process_python_file(fullpath))

    return imports

############################
# PyPI Availability Check
############################

def check_library_on_pypi(library):
    """
    Returns True if 'library' is on PyPI, else False.
    Using urllib to avoid external dependencies.
    """
    url = f"https://pypi.org/pypi/{library}/json"
    try:
        with urllib.request.urlopen(url, timeout=5) as resp:
            # HTTP 200 => library is found
            return (resp.status == 200)
    except (urllib.error.URLError, urllib.error.HTTPError, ValueError):
        return False

############################
# Subcommand: install
############################

def subcmd_install(args):
    """
    If user typed exactly `deps install` or `deps install -r`, we gather imports from current dir,
    check PyPI availability, and install them with conda/mamba/pip (unless --no-conda given).
    
    Otherwise we do the normal logic of:
      - installing from a requirements file with `-r req.txt`
      - installing from a .py file
      - installing direct package names
    """

    # Check for --no-conda (we remove it from args to not confuse other logic)
    skip_conda = False
    filtered_args = []
    i = 0
    while i < len(args):
        if args[i] == '--no-conda':
            skip_conda = True
            i += 1
        else:
            filtered_args.append(args[i])
            i += 1

    args = filtered_args

    # If user typed exactly: deps install  (no arguments) or deps install -r (only that arg)
    if not args or (args == ['-r']):
        is_recursive = (args == ['-r'])
        imports_found = find_imports_in_path('.', recurse=is_recursive)
        if not imports_found:
            print("No imports found in current directory.")
            return
        # Filter out those that are on PyPI
        to_install = []
        for lib in sorted(imports_found):
            if check_library_on_pypi(lib):
                to_install.append(lib)
            else:
                print(f"Skipping '{lib}' (not found on PyPI).")
        if not to_install:
            print("No PyPI-available packages found to install.")
            return
        print("Installing packages:", ', '.join(to_install))
        for pkg in to_install:
            install_package(pkg, skip_conda=skip_conda)
        return

    # Otherwise, normal logic with direct packages, .py files, or -r <filename>
    packages_to_install = set()
    i = 0
    while i < len(args):
        arg = args[i]
        if arg == '-r':
            # next arg is a requirements file
            if i + 1 < len(args):
                req_file = args[i + 1]
                if os.path.isfile(req_file):
                    pkgs = process_requirements_file(req_file)
                    packages_to_install.update(pkgs)
                else:
                    print(f"Requirements file not found: {req_file}")
                i += 2
            else:
                print("Error: -r requires a file path.")
                return
        elif arg.endswith('.py'):
            # parse imports from that script
            if os.path.isfile(arg):
                pkgs = process_python_file(arg)
                packages_to_install.update(pkgs)
            else:
                print(f"File not found: {arg}")
            i += 1
        else:
            # treat as a direct package name
            packages_to_install.add(arg)
            i += 1

    # Install all packages in packages_to_install
    for pkg in sorted(packages_to_install):
        install_package(pkg, skip_conda=skip_conda)

############################
# Subcommand: ls
############################

def subcmd_ls(args):
    parser = argparse.ArgumentParser(prog='deps ls', add_help=False)
    parser.add_argument('-r', '--recurse', action='store_true',
                        help='Recurse into subfolders.')
    parser.add_argument('path', nargs='?', default='.',
                        help='File or directory to scan. Default is current directory.')
    known_args, remaining_args = parser.parse_known_args(args)

    path = known_args.path
    recurse = known_args.recurse

    # Ensure the path exists and handle imports accordingly
    if not os.path.exists(path):
        print(f"Error: Path does not exist: {path}")
        return

    # Call the import finder with the specified options
    imports = find_imports_in_path(path, recurse=recurse)
    if imports:
        print("Imports found:")
        for imp in sorted(imports):
            print(f" - {imp}")
    else:
        print("No Python imports found (or none that require external packages).")


############################
# Main
############################

def main():
    parser = argparse.ArgumentParser(description='deps - Manage and inspect Python dependencies.')
    subparsers = parser.add_subparsers(dest='subcommand', required=True)

    # Subcommand: install
    install_parser = subparsers.add_parser('install',
        help="Install packages or dependencies from .py files / current folder / subfolders.")
    install_parser.add_argument('args', nargs=argparse.REMAINDER,
        help=(
            "If empty, scans current dir. If '-r' only, scans recursively. "
            "Otherwise, pass script names, package names, or '-r <file>'. "
            "Use --no-conda to skip mamba/conda usage."
        ))

    # Subcommand: ls
    ls_parser = subparsers.add_parser('ls',
        help="List imports in a file/folder. Use '-r' to recurse.")
    ls_parser.add_argument('ls_args', nargs=argparse.REMAINDER,
        help="Optional '-r' plus path (file/folder).")

    parsed_args = parser.parse_args()

    if parsed_args.subcommand == 'install':
        subcmd_install(parsed_args.args)
    elif parsed_args.subcommand == 'ls':
        subcmd_ls(parsed_args.ls_args)

if __name__ == "__main__":
    main()