diff --git a/deps b/deps new file mode 100755 index 0000000..f7de0a3 --- /dev/null +++ b/deps @@ -0,0 +1,371 @@ +#!/usr/bin/env python3 + +import argparse +import os +import re +import subprocess +import sys +import urllib.request +import urllib.error + +############################ +# Shared Data & Constants +############################ + +BUILTIN_MODULES = { + 'abc', 'aifc', 'argparse', 'array', 'ast', 'asynchat', 'asyncio', 'asyncore', 'atexit', + 'audioop', 'base64', 'bdb', 'binascii', 'binhex', 'bisect', 'builtins', 'bz2', 'calendar', + 'cgi', 'cgitb', 'chunk', 'cmath', 'cmd', 'code', 'codecs', 'codeop', 'collections', 'colorsys', + 'compileall', 'concurrent', 'configparser', 'contextlib', 'copy', 'copyreg', 'crypt', 'csv', + 'ctypes', 'curses', 'dataclasses', 'datetime', 'dbm', 'decimal', 'difflib', 'dis', 'distutils', + 'doctest', 'dummy_threading', 'email', 'encodings', 'ensurepip', 'enum', 'errno', 'faulthandler', + 'fcntl', 'filecmp', 'fileinput', 'fnmatch', 'formatter', 'fractions', 'ftplib', 'functools', + 'gc', 'getopt', 'getpass', 'gettext', 'glob', 'gzip', 'hashlib', 'heapq', 'hmac', 'html', 'http', + 'imaplib', 'imghdr', 'imp', 'importlib', 'inspect', 'io', 'ipaddress', 'itertools', 'json', + 'keyword', 'lib2to3', 'linecache', 'locale', 'logging', 'lzma', 'mailbox', 'mailcap', 'marshal', + 'math', 'mimetypes', 'modulefinder', 'multiprocessing', 'netrc', 'nntplib', 'numbers', 'operator', + 'optparse', 'os', 'ossaudiodev', 'parser', 'pathlib', 'pdb', 'pickle', 'pickletools', 'pipes', + 'pkgutil', 'platform', 'plistlib', 'poplib', 'posix', 'pprint', 'profile', 'pstats', 'pty', + 'pwd', 'py_compile', 'pyclbr', 'pydoc', 'queue', 'quopri', 'random', 're', 'readline', + 'reprlib', 'resource', 'rlcompleter', 'runpy', 'sched', 'secrets', 'select', 'selectors', 'shelve', + 'shlex', 'shutil', 'signal', 'site', 'smtpd', 'smtplib', 'sndhdr', 'socket', 'socketserver', + 'spwd', 'sqlite3', 'ssl', 'stat', 'statistics', 'string', 'stringprep', 'struct', 'subprocess', + 'sunau', 'symtable', 'sys', 'sysconfig', 'syslog', 'tabnanny', 'tarfile', 'telnetlib', 'tempfile', + 'termios', 'test', 'textwrap', 'threading', 'time', 'timeit', 'token', 'tokenize', 'trace', + 'traceback', 'tracemalloc', 'tty', 'turtle', 'types', 'typing', 'unicodedata', 'unittest', + 'urllib', 'uu', 'uuid', 'venv', 'warnings', 'wave', 'weakref', 'webbrowser', 'xdrlib', 'xml', + 'xmlrpc', 'zipapp', 'zipfile', 'zipimport', 'zlib' +} + +KNOWN_CORRECTIONS = { + 'dateutil': 'python-dateutil', + 'dotenv': 'python-dotenv', + 'docx': 'python-docx', + 'tesseract': 'pytesseract', + 'magic': 'python-magic', + 'multipart': 'python-multipart', + 'newspaper': 'newspaper3k', + 'srtm': 'elevation', + 'yaml': 'pyyaml', + 'zoneinfo': 'backports.zoneinfo' +} + +EXCLUDED_NAMES = {'models', 'data', 'convert', 'example', 'tests'} + +############################ +# Environment & Installation +############################ + +def run_command(command): + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + return process.returncode, stdout.decode(), stderr.decode() + +def which(cmd): + """ + Check if `cmd` is on PATH. Returns True if found, else False. + """ + for path in os.environ["PATH"].split(os.pathsep): + if os.path.isdir(path): + cmd_path = os.path.join(path, cmd) + if os.access(cmd_path, os.X_OK): + return True + return False + +def in_conda_env(): + """ + Returns True if we appear to be in a conda environment, + typically indicated by CONDA_DEFAULT_ENV or other variables. + """ + return "CONDA_DEFAULT_ENV" in os.environ + +def is_package_installed(package, skip_conda=False): + """ + Checks if 'package' is installed in either mamba/conda (if in conda env and skip_conda=False) or pip. + """ + if not skip_conda and in_conda_env(): + # Try mamba or conda + if which("mamba"): + returncode, stdout, _ = run_command(["mamba", "list"]) + if returncode == 0: + pattern = rf"^{re.escape(package)}\s" + if re.search(pattern, stdout, re.MULTILINE): + return True + elif which("conda"): + returncode, stdout, _ = run_command(["conda", "list"]) + if returncode == 0: + pattern = rf"^{re.escape(package)}\s" + if re.search(pattern, stdout, re.MULTILINE): + return True + + # Fall back to pip + returncode, stdout, _ = run_command(["pip", "list"]) + pattern = rf"^{re.escape(package)}\s" + return re.search(pattern, stdout, re.MULTILINE) is not None + +def install_package(package, skip_conda=False): + """ + Installs 'package'. + 1) If skip_conda=False and in conda env -> try mamba or conda + 2) Fallback to pip + """ + if is_package_installed(package, skip_conda=skip_conda): + print(f"Package '{package}' is already installed.") + return + + if not skip_conda and in_conda_env(): + # check mamba + if which("mamba"): + print(f"Installing '{package}' with mamba...") + returncode, _, _ = run_command(["mamba", "install", "-y", "-c", "conda-forge", package]) + if returncode == 0: + print(f"Successfully installed '{package}' via mamba.") + return + print(f"mamba failed for '{package}', falling back to conda/pip...") + + # check conda + if which("conda"): + print(f"Installing '{package}' with conda...") + returncode, _, _ = run_command(["conda", "install", "-y", "-c", "conda-forge", package]) + if returncode == 0: + print(f"Successfully installed '{package}' via conda.") + return + print(f"conda failed for '{package}', falling back to pip...") + + # fallback: pip + print(f"Installing '{package}' with pip...") + returncode, _, _ = run_command(["pip", "install", package]) + if returncode != 0: + print(f"Failed to install package '{package}'.") + else: + print(f"Successfully installed '{package}' via pip.") + +############################ +# Parsing Python Imports +############################ + +def process_requirements_file(file_path): + packages = set() + with open(file_path, 'r') as file: + for line in file: + line = line.strip() + if line and not line.startswith('#'): + packages.add(line) + return packages + +def process_python_file(file_path): + """ + Return a set of external imports (not built-in or excluded). + Applies known corrections to recognized package names. + """ + imports = set() + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + for line in content.split('\n'): + line = line.strip() + if line.startswith(('import ', 'from ')) and not line.startswith('#'): + if line.startswith('import '): + modules = line.replace('import ', '').split(',') + for mod in modules: + mod = re.sub(r'\s+as\s+\w+', '', mod).split('.')[0].strip() + if mod and not mod.isupper() and mod not in EXCLUDED_NAMES and mod not in BUILTIN_MODULES: + imports.add(KNOWN_CORRECTIONS.get(mod, mod)) + elif line.startswith('from '): + mod = line.split(' ')[1].split('.')[0].strip() + if mod and not mod.isupper() and mod not in EXCLUDED_NAMES and mod not in BUILTIN_MODULES: + imports.add(KNOWN_CORRECTIONS.get(mod, mod)) + + return imports + +def find_imports_in_path(path, recurse=False): + """ + Finds Python imports in the specified path. If path is a file, parse that file; + if path is a dir, parse .py files in that dir. Recurse subdirs if 'recurse=True'. + """ + imports = set() + if not os.path.exists(path): + print(f"Warning: Path does not exist: {path}") + return imports + + if os.path.isfile(path): + if path.endswith('.py'): + imports.update(process_python_file(path)) + else: + print(f"Skipping non-Python file: {path}") + return imports + + # Directory: + if recurse: + for root, _, filenames in os.walk(path): + for fn in filenames: + if fn.endswith('.py'): + imports.update(process_python_file(os.path.join(root, fn))) + else: + for fn in os.listdir(path): + fullpath = os.path.join(path, fn) + if os.path.isfile(fullpath) and fn.endswith('.py'): + imports.update(process_python_file(fullpath)) + + return imports + +############################ +# PyPI Availability Check +############################ + +def check_library_on_pypi(library): + """ + Returns True if 'library' is on PyPI, else False. + Using urllib to avoid external dependencies. + """ + url = f"https://pypi.org/pypi/{library}/json" + try: + with urllib.request.urlopen(url, timeout=5) as resp: + # HTTP 200 => library is found + return (resp.status == 200) + except (urllib.error.URLError, urllib.error.HTTPError, ValueError): + return False + +############################ +# Subcommand: install +############################ + +def subcmd_install(args): + """ + If user typed exactly `deps install` or `deps install -r`, we gather imports from current dir, + check PyPI availability, and install them with conda/mamba/pip (unless --no-conda given). + + Otherwise we do the normal logic of: + - installing from a requirements file with `-r req.txt` + - installing from a .py file + - installing direct package names + """ + + # Check for --no-conda (we remove it from args to not confuse other logic) + skip_conda = False + filtered_args = [] + i = 0 + while i < len(args): + if args[i] == '--no-conda': + skip_conda = True + i += 1 + else: + filtered_args.append(args[i]) + i += 1 + + args = filtered_args + + # If user typed exactly: deps install (no arguments) or deps install -r (only that arg) + if not args or (args == ['-r']): + is_recursive = (args == ['-r']) + imports_found = find_imports_in_path('.', recurse=is_recursive) + if not imports_found: + print("No imports found in current directory.") + return + # Filter out those that are on PyPI + to_install = [] + for lib in sorted(imports_found): + if check_library_on_pypi(lib): + to_install.append(lib) + else: + print(f"Skipping '{lib}' (not found on PyPI).") + if not to_install: + print("No PyPI-available packages found to install.") + return + print("Installing packages:", ', '.join(to_install)) + for pkg in to_install: + install_package(pkg, skip_conda=skip_conda) + return + + # Otherwise, normal logic with direct packages, .py files, or -r <filename> + packages_to_install = set() + i = 0 + while i < len(args): + arg = args[i] + if arg == '-r': + # next arg is a requirements file + if i + 1 < len(args): + req_file = args[i + 1] + if os.path.isfile(req_file): + pkgs = process_requirements_file(req_file) + packages_to_install.update(pkgs) + else: + print(f"Requirements file not found: {req_file}") + i += 2 + else: + print("Error: -r requires a file path.") + return + elif arg.endswith('.py'): + # parse imports from that script + if os.path.isfile(arg): + pkgs = process_python_file(arg) + packages_to_install.update(pkgs) + else: + print(f"File not found: {arg}") + i += 1 + else: + # treat as a direct package name + packages_to_install.add(arg) + i += 1 + + # Install all packages in packages_to_install + for pkg in sorted(packages_to_install): + install_package(pkg, skip_conda=skip_conda) + +############################ +# Subcommand: ls +############################ + +def subcmd_ls(args): + parser = argparse.ArgumentParser(prog='deps ls', add_help=False) + parser.add_argument('-r', '--recurse', action='store_true', + help='Recurse into subfolders.') + parser.add_argument('path', nargs='?', default='.', + help='File or directory to scan. Default is current directory.') + known_args, _ = parser.parse_known_args(args) + + path = known_args.path + recurse = known_args.recurse + + imports = find_imports_in_path(path, recurse=recurse) + if imports: + print("Imports found:") + for imp in sorted(imports): + print(f" - {imp}") + else: + print("No Python imports found (or none that require external packages).") + +############################ +# Main +############################ + +def main(): + parser = argparse.ArgumentParser(description='deps - Manage and inspect Python dependencies.') + subparsers = parser.add_subparsers(dest='subcommand', required=True) + + # Subcommand: install + install_parser = subparsers.add_parser('install', + help="Install packages or dependencies from .py files / current folder / subfolders.") + install_parser.add_argument('args', nargs=argparse.REMAINDER, + help=( + "If empty, scans current dir. If '-r' only, scans recursively. " + "Otherwise, pass script names, package names, or '-r <file>'. " + "Use --no-conda to skip mamba/conda usage." + )) + + # Subcommand: ls + ls_parser = subparsers.add_parser('ls', + help="List imports in a file/folder. Use '-r' to recurse.") + ls_parser.add_argument('ls_args', nargs=argparse.REMAINDER, + help="Optional '-r' plus path (file/folder).") + + parsed_args = parser.parse_args() + + if parsed_args.subcommand == 'install': + subcmd_install(parsed_args.args) + elif parsed_args.subcommand == 'ls': + subcmd_ls(parsed_args.ls_args) + +if __name__ == "__main__": + main() +