diff --git a/tests/test_markdown_to_jsonl.py b/tests/test_markdown_to_jsonl.py new file mode 100644 index 00000000..712053e8 --- /dev/null +++ b/tests/test_markdown_to_jsonl.py @@ -0,0 +1,81 @@ +# Standard Packages +import json + +# Internal Packages +from src.processor.markdown.markdown_to_jsonl import extract_markdown_entries, convert_markdown_maps_to_jsonl, convert_markdown_entries_to_maps + + +def test_markdown_file_with_no_headings_to_jsonl(tmp_path): + "Convert files with no heading to jsonl." + # Arrange + entry = f''' + - Bullet point 1 + - Bullet point 2 + ''' + markdownfile = create_file(tmp_path, entry) + + # Act + # Extract Entries from specified Markdown files + entry_nodes, file_to_entries = extract_markdown_entries(markdown_files=[markdownfile]) + + # Process Each Entry from All Notes Files + jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entry_nodes, file_to_entries)) + jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] + + # Assert + assert len(jsonl_data) == 1 + + +def test_single_markdown_entry_to_jsonl(tmp_path): + "Convert markdown entry from single file to jsonl." + # Arrange + entry = f'''### Heading + \t\r + Body Line 1 + ''' + markdownfile = create_file(tmp_path, entry) + + # Act + # Extract Entries from specified Markdown files + entries, entry_to_file_map = extract_markdown_entries(markdown_files=[markdownfile]) + + # Process Each Entry from All Notes Files + jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entries, entry_to_file_map)) + jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] + + # Assert + assert len(jsonl_data) == 1 + + +def test_multiple_markdown_entries_to_jsonl(tmp_path): + "Convert multiple markdown entries from single file to jsonl." + # Arrange + entry = f''' +### Heading 1 + \t\r + Heading 1 Body Line 1 +### Heading 2 + \t\r + Heading 2 Body Line 2 + ''' + markdownfile = create_file(tmp_path, entry) + + # Act + # Extract Entries from specified Markdown files + entries, entry_to_file_map = extract_markdown_entries(markdown_files=[markdownfile]) + + # Process Each Entry from All Notes Files + jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entries, entry_to_file_map)) + jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] + + # Assert + assert len(jsonl_data) == 2 + + +# Helper Functions +def create_file(tmp_path, entry, filename="test.md"): + markdown_file = tmp_path / f"notes/{filename}" + markdown_file.parent.mkdir() + markdown_file.touch() + markdown_file.write_text(entry) + return markdown_file