mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
8eccd8a5e4
- Added support for uploading .jpeg, .jpg, and .png files to Khoj from Web, Desktop app - Updating indexer to generate raw text and entries using RapidOCR - Details * added support for indexing images via ocr * fixed pyproject.toml * Update src/khoj/processor/content/images/image_to_entries.py Co-authored-by: Debanjum <debanjum@gmail.com> * Update src/khoj/processor/content/images/image_to_entries.py Co-authored-by: Debanjum <debanjum@gmail.com> * removed redudant try except blocks * updated desktop js file to support image formats * added tests for jpg and png * Fix processing for image to entries files * Update unit tests with working image indexer * Change png test from version verificaition to open-cv verification --------- Co-authored-by: Debanjum <debanjum@gmail.com> Co-authored-by: sabaimran <narmiabas@gmail.com>
21 lines
723 B
Python
21 lines
723 B
Python
import os
|
|
|
|
from khoj.processor.content.images.image_to_entries import ImageToEntries
|
|
|
|
|
|
def test_png_to_jsonl():
|
|
with open("tests/data/images/testocr.png", "rb") as f:
|
|
image_bytes = f.read()
|
|
data = {"tests/data/images/testocr.png": image_bytes}
|
|
entries = ImageToEntries.extract_image_entries(image_files=data)
|
|
assert len(entries) == 2
|
|
assert "opencv-python" in entries[1][0].raw
|
|
|
|
|
|
def test_jpg_to_jsonl():
|
|
with open("tests/data/images/nasdaq.jpg", "rb") as f:
|
|
image_bytes = f.read()
|
|
data = {"tests/data/images/nasdaq.jpg": image_bytes}
|
|
entries = ImageToEntries.extract_image_entries(image_files=data)
|
|
assert len(entries) == 2
|
|
assert "investments" in entries[1][0].raw
|