Auto-update: Mon Feb 24 14:35:49 PST 2025

2025-02-24 14:35:49 -08:00 · 2025-02-24 14:35:49 -08:00 · b82bff7236
commit b82bff7236
parent ac3522607f
1 changed files with 26 additions and 1 deletions
--- a/27
+++ b/27
@ -1,7 +1,32 @@
 #!/usr/bin/env python3
-import io
+"""
+jpgpdfocr - Convert JPG images to a searchable PDF using OCR.
+
+This script processes a directory of JPG images, runs OCR (Optical Character Recognition) 
+on each image using Tesseract, and merges them into a single searchable PDF.
+
+Usage:
+    ./jpgpdfocr --input <directory> [--output <file>] [--lang <language>] 
+                [--threads <num>] [--quiet]
+
+Arguments:
+    --input, -i    Directory containing JPG files (required).
+    --output, -o   Output PDF filename (default: <input_folder>_searchable.pdf).
+    --lang, -l     OCR language (default: 'eng').
+    --threads, -t  Number of threads for OCR (default: auto-detect CPU cores).
+    --quiet, -q    Suppress output messages.
+
+Dependencies:
+    - Python 3
+    - PIL (Pillow)
+    - pytesseract (Tesseract OCR)
+    - PyPDF2
+    - concurrent.futures (built-in)
+"""
+
 import os
 import argparse
+import io
 from PIL import Image
 import pytesseract
 import concurrent.futures