Fix logic to ignore notes with no body. Add tests to prevent regression

- Notes with empty newlines in body were not being ignored
- Add regression tests to avoid above regression in org_to_jsonl conversion
This commit is contained in:
Debanjum Singh Solanky 2022-08-21 19:41:40 +03:00
parent 5e107eedc0
commit ea4fdd9134
2 changed files with 65 additions and 2 deletions

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
# Standard Packages
import re
import json
import argparse
import pathlib
@ -71,14 +72,14 @@ def extract_org_entries(org_files):
return entries
def convert_org_entries_to_jsonl(entries, verbose=0):
def convert_org_entries_to_jsonl(entries, verbose=0) -> str:
"Convert each Org-Mode entries to JSON and collate as JSONL"
jsonl = ''
for entry in entries:
entry_dict = dict()
# Ignore title notes i.e notes with just headings and empty body
if not entry.Body() or entry.Body().strip(empty_escape_sequences) == "":
if not entry.Body() or re.sub(r'\n|\t|\r| ', '', entry.Body()) == "":
continue
entry_dict["compiled"] = f'{entry.Heading()}.'

View file

@ -0,0 +1,62 @@
# Standard Packages
import json
from posixpath import split
# Internal Packages
from src.processor.org_mode.org_to_jsonl import convert_org_entries_to_jsonl, extract_org_entries
from src.utils.helpers import is_none_or_empty
def test_entry_with_empty_body_line_to_jsonl(tmp_path):
'''Ensure entries with empty body are ignored.
Property drawers not considered Body. Ignore control characters for evaluating if Body empty.'''
# Arrange
entry = f'''*** Heading
:PROPERTIES:
:ID: 42-42-42
:END:
\t\r\n
'''
orgfile = create_file(tmp_path, entry)
# Act
# Extract Entries from specified Org files
entries = extract_org_entries(org_files=[orgfile])
# Process Each Entry from All Notes Files
jsonl_data = convert_org_entries_to_jsonl(entries)
# Assert
assert is_none_or_empty(jsonl_data)
def test_entry_with_body_to_jsonl(tmp_path):
"Ensure entries with valid body text are loaded."
# Arrange
entry = f'''*** Heading
:PROPERTIES:
:ID: 42-42-42
:END:
\t\r\nBody Line 1\n
'''
orgfile = create_file(tmp_path, entry)
# Act
# Extract Entries from specified Org files
entries = extract_org_entries(org_files=[orgfile])
# Process Each Entry from All Notes Files
jsonl_string = convert_org_entries_to_jsonl(entries)
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
# Assert
assert len(jsonl_data) == 1
# Helper Functions
def create_file(tmp_path, entry, filename="test.org"):
org_file = tmp_path / f"notes/{filename}"
org_file.parent.mkdir()
org_file.touch()
org_file.write_text(entry)
return org_file