mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 09:25:06 +01:00
Make RapidOCR dependency optional as flaky requirements
RapidOCR depends on OpenCV which by default requires a bunch of GUI paramters. This system package dependency set (like libgl1) is flaky Making the RapidOCR dependency optional should allow khoj to be more resilient to setup/dependency failures Trade-off is that OCR for documents may not always be available and it'll require looking at server logs to find out when this happens
This commit is contained in:
parent
0a568244fd
commit
077b88bafa
2 changed files with 7 additions and 5 deletions
|
@ -73,7 +73,7 @@ dependencies = [
|
|||
"psycopg2-binary == 2.9.9",
|
||||
"lxml == 4.9.3",
|
||||
"tzdata == 2023.3",
|
||||
"rapidocr-onnxruntime == 1.3.22",
|
||||
"rapidocr-onnxruntime == 1.3.24",
|
||||
"openai-whisper >= 20231117",
|
||||
"django-phonenumber-field == 7.3.0",
|
||||
"phonenumbers == 8.13.27",
|
||||
|
|
|
@ -4,8 +4,6 @@ import os
|
|||
from datetime import datetime
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
from rapidocr_onnxruntime import RapidOCR
|
||||
|
||||
from khoj.database.models import Entry as DbEntry
|
||||
from khoj.database.models import KhojUser
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
|
@ -58,7 +56,6 @@ class ImageToEntries(TextToEntries):
|
|||
entry_to_location_map: List[Tuple[str, str]] = []
|
||||
for image_file in image_files:
|
||||
try:
|
||||
loader = RapidOCR()
|
||||
bytes = image_files[image_file]
|
||||
# write the image to a temporary file
|
||||
timestamp_now = datetime.utcnow().timestamp()
|
||||
|
@ -71,13 +68,18 @@ class ImageToEntries(TextToEntries):
|
|||
bytes = image_files[image_file]
|
||||
f.write(bytes)
|
||||
try:
|
||||
from rapidocr_onnxruntime import RapidOCR
|
||||
|
||||
loader = RapidOCR()
|
||||
image_entries_per_file = ""
|
||||
result, _ = loader(tmp_file)
|
||||
if result:
|
||||
expanded_entries = [text[1] for text in result]
|
||||
image_entries_per_file = " ".join(expanded_entries)
|
||||
except ImportError:
|
||||
logger.warning(f"Unable to process file: {image_file}. This file will not be indexed.")
|
||||
logger.warning(
|
||||
f"Unable to process image or scanned file for text: {image_file}. This file will not be indexed."
|
||||
)
|
||||
continue
|
||||
entry_to_location_map.append((image_entries_per_file, image_file))
|
||||
entries.extend([image_entries_per_file])
|
||||
|
|
Loading…
Reference in a new issue