mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Fix PDFs unit test, skip OCR
This commit is contained in:
parent
59fec37943
commit
ad197be70c
1 changed files with 3 additions and 0 deletions
|
@ -1,6 +1,8 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
||||||
from khoj.utils.fs_syncer import get_pdf_files
|
from khoj.utils.fs_syncer import get_pdf_files
|
||||||
from khoj.utils.rawconfig import TextContentConfig
|
from khoj.utils.rawconfig import TextContentConfig
|
||||||
|
@ -37,6 +39,7 @@ def test_multi_page_pdf_to_jsonl():
|
||||||
assert len(entries[1]) == 6
|
assert len(entries[1]) == 6
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="Temporarily disabled OCR due to performance issues")
|
||||||
def test_ocr_page_pdf_to_jsonl():
|
def test_ocr_page_pdf_to_jsonl():
|
||||||
"Convert multiple pages from single PDF file to jsonl."
|
"Convert multiple pages from single PDF file to jsonl."
|
||||||
# Arrange
|
# Arrange
|
||||||
|
|
Loading…
Reference in a new issue