Fix PDFs unit test, skip OCR

This commit is contained in:
sabaimran 2024-10-20 22:25:41 -07:00
parent 59fec37943
commit ad197be70c

View file

@ -1,6 +1,8 @@
import os
import re
import pytest
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
from khoj.utils.fs_syncer import get_pdf_files
from khoj.utils.rawconfig import TextContentConfig
@ -37,6 +39,7 @@ def test_multi_page_pdf_to_jsonl():
assert len(entries[1]) == 6
@pytest.mark.skip(reason="Temporarily disabled OCR due to performance issues")
def test_ocr_page_pdf_to_jsonl():
"Convert multiple pages from single PDF file to jsonl."
# Arrange