diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 84fbb1aa..697579da 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -61,7 +61,7 @@ jobs:
         env:
           DEBIAN_FRONTEND: noninteractive
         run: |
-          apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0
+          apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
 
       - name: ⬇️ Install Postgres
         env:
diff --git a/tests/data/pdf/ocr_samples.pdf b/tests/data/pdf/ocr_samples.pdf
new file mode 100644
index 00000000..100f60e0
Binary files /dev/null and b/tests/data/pdf/ocr_samples.pdf differ
diff --git a/tests/test_pdf_to_entries.py b/tests/test_pdf_to_entries.py
index 81ea18c8..3ab44639 100644
--- a/tests/test_pdf_to_entries.py
+++ b/tests/test_pdf_to_entries.py
@@ -50,6 +50,23 @@ def test_multi_page_pdf_to_jsonl():
     assert len(jsonl_data) == 6
 
 
+def test_ocr_page_pdf_to_jsonl():
+    "Convert multiple pages from single PDF file to jsonl."
+    # Act
+    # Extract Entries from specified Pdf files
+    with open("tests/data/pdf/ocr_samples.pdf", "rb") as f:
+        pdf_bytes = f.read()
+
+    data = {"tests/data/pdf/ocr_samples.pdf": pdf_bytes}
+    entries, entry_to_file_map = PdfToEntries.extract_pdf_entries(pdf_files=data)
+
+    # Process Each Entry from All Pdf Files
+    entries = PdfToEntries.convert_pdf_entries_to_maps(entries, entry_to_file_map)
+
+    assert len(entries) == 1
+    assert "playing on a strip of marsh" in entries[0].raw
+
+
 def test_get_pdf_files(tmp_path):
     "Ensure Pdf files specified via input-filter, input-files extracted"
     # Arrange