mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Try adding dependencies for libgl in order to run OCR in github action unit tests
This commit is contained in:
parent
5f1e37fff0
commit
3d6e8d53fe
3 changed files with 18 additions and 1 deletions
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
@ -61,7 +61,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
DEBIAN_FRONTEND: noninteractive
|
DEBIAN_FRONTEND: noninteractive
|
||||||
run: |
|
run: |
|
||||||
apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0
|
apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
|
||||||
|
|
||||||
- name: ⬇️ Install Postgres
|
- name: ⬇️ Install Postgres
|
||||||
env:
|
env:
|
||||||
|
|
BIN
tests/data/pdf/ocr_samples.pdf
vendored
Normal file
BIN
tests/data/pdf/ocr_samples.pdf
vendored
Normal file
Binary file not shown.
|
@ -50,6 +50,23 @@ def test_multi_page_pdf_to_jsonl():
|
||||||
assert len(jsonl_data) == 6
|
assert len(jsonl_data) == 6
|
||||||
|
|
||||||
|
|
||||||
|
def test_ocr_page_pdf_to_jsonl():
|
||||||
|
"Convert multiple pages from single PDF file to jsonl."
|
||||||
|
# Act
|
||||||
|
# Extract Entries from specified Pdf files
|
||||||
|
with open("tests/data/pdf/ocr_samples.pdf", "rb") as f:
|
||||||
|
pdf_bytes = f.read()
|
||||||
|
|
||||||
|
data = {"tests/data/pdf/ocr_samples.pdf": pdf_bytes}
|
||||||
|
entries, entry_to_file_map = PdfToEntries.extract_pdf_entries(pdf_files=data)
|
||||||
|
|
||||||
|
# Process Each Entry from All Pdf Files
|
||||||
|
entries = PdfToEntries.convert_pdf_entries_to_maps(entries, entry_to_file_map)
|
||||||
|
|
||||||
|
assert len(entries) == 1
|
||||||
|
assert "playing on a strip of marsh" in entries[0].raw
|
||||||
|
|
||||||
|
|
||||||
def test_get_pdf_files(tmp_path):
|
def test_get_pdf_files(tmp_path):
|
||||||
"Ensure Pdf files specified via input-filter, input-files extracted"
|
"Ensure Pdf files specified via input-filter, input-files extracted"
|
||||||
# Arrange
|
# Arrange
|
||||||
|
|
Loading…
Reference in a new issue