From d3267554ae7a7190dba7ebc471e27a23df90bf9a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 10 Sep 2022 23:57:17 +0300 Subject: [PATCH] Add basic tests for markdown to jsonl conversion --- tests/test_markdown_to_jsonl.py | 81 +++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 tests/test_markdown_to_jsonl.py diff --git a/tests/test_markdown_to_jsonl.py b/tests/test_markdown_to_jsonl.py new file mode 100644 index 00000000..712053e8 --- /dev/null +++ b/tests/test_markdown_to_jsonl.py @@ -0,0 +1,81 @@ +# Standard Packages +import json + +# Internal Packages +from src.processor.markdown.markdown_to_jsonl import extract_markdown_entries, convert_markdown_maps_to_jsonl, convert_markdown_entries_to_maps + + +def test_markdown_file_with_no_headings_to_jsonl(tmp_path): + "Convert files with no heading to jsonl." + # Arrange + entry = f''' + - Bullet point 1 + - Bullet point 2 + ''' + markdownfile = create_file(tmp_path, entry) + + # Act + # Extract Entries from specified Markdown files + entry_nodes, file_to_entries = extract_markdown_entries(markdown_files=[markdownfile]) + + # Process Each Entry from All Notes Files + jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entry_nodes, file_to_entries)) + jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] + + # Assert + assert len(jsonl_data) == 1 + + +def test_single_markdown_entry_to_jsonl(tmp_path): + "Convert markdown entry from single file to jsonl." + # Arrange + entry = f'''### Heading + \t\r + Body Line 1 + ''' + markdownfile = create_file(tmp_path, entry) + + # Act + # Extract Entries from specified Markdown files + entries, entry_to_file_map = extract_markdown_entries(markdown_files=[markdownfile]) + + # Process Each Entry from All Notes Files + jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entries, entry_to_file_map)) + jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] + + # Assert + assert len(jsonl_data) == 1 + + +def test_multiple_markdown_entries_to_jsonl(tmp_path): + "Convert multiple markdown entries from single file to jsonl." + # Arrange + entry = f''' +### Heading 1 + \t\r + Heading 1 Body Line 1 +### Heading 2 + \t\r + Heading 2 Body Line 2 + ''' + markdownfile = create_file(tmp_path, entry) + + # Act + # Extract Entries from specified Markdown files + entries, entry_to_file_map = extract_markdown_entries(markdown_files=[markdownfile]) + + # Process Each Entry from All Notes Files + jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entries, entry_to_file_map)) + jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] + + # Assert + assert len(jsonl_data) == 2 + + +# Helper Functions +def create_file(tmp_path, entry, filename="test.md"): + markdown_file = tmp_path / f"notes/{filename}" + markdown_file.parent.mkdir() + markdown_file.touch() + markdown_file.write_text(entry) + return markdown_file