Index intro text before headings in org files

- Text before headings was not being indexed due to buggy orgnode
  parsing logic
- Resolved indexing intro text from files with and without headings in
  them
- Ensure intro text node has heading set to all title lines collected
  from the file

Resolves #165
This commit is contained in:
Debanjum Singh Solanky 2023-03-01 12:11:33 -06:00
parent ed177db2be
commit fe03ba3dce
3 changed files with 97 additions and 15 deletions

View file

@ -73,6 +73,7 @@ def makelist(filename):
level = ""
heading = ""
bodytext = ""
introtext = ""
tags = list() # set of all tags in headline
closed_date = ""
sched_date = ""
@ -133,7 +134,7 @@ def makelist(filename):
file_title += f" {title_text}"
continue
# Ignore Properties Drawers Completely
# Ignore Properties Drawer Start, End Lines
if re.search(":PROPERTIES:", line):
in_properties_drawer = True
continue
@ -190,20 +191,33 @@ def makelist(filename):
and not clocked_re
and line[:1] != "#"
):
bodytext = bodytext + line
# if we are in a heading
if heading:
# add the line to the bodytext
bodytext += line
# else we are in the pre heading portion of the file
elif line.strip():
# so add the line to the introtext
introtext += line
# write out last node
thisNode = Orgnode(level, heading or file_title, bodytext, tags)
thisNode.properties = property_map
if sched_date:
thisNode.scheduled = sched_date
if deadline_date:
thisNode.deadline = deadline_date
if closed_date:
thisNode.closed = closed_date
if logbook:
thisNode.logbook = logbook
nodelist.append(thisNode)
# write out intro node before headings
# this is done at the end to allow collating all title lines
if introtext:
thisNode = Orgnode(level, file_title, introtext, tags)
nodelist = [thisNode] + nodelist
# write out last heading node
if heading:
thisNode = Orgnode(level, heading, bodytext, tags)
thisNode.properties = property_map
if sched_date:
thisNode.scheduled = sched_date
if deadline_date:
thisNode.deadline = deadline_date
if closed_date:
thisNode.closed = closed_date
if logbook:
thisNode.logbook = logbook
nodelist.append(thisNode)
# using the list of TODO keywords found in the file
# process the headings searching for TODO keywords

View file

@ -108,6 +108,30 @@ def test_entry_with_body_to_jsonl(tmp_path):
assert len(jsonl_data) == 1
def test_file_with_entry_after_intro_text_to_jsonl(tmp_path):
"Ensure intro text before any headings is indexed."
# Arrange
entry = f"""
Intro text
* Entry Heading
entry body
"""
orgfile = create_file(tmp_path, entry)
# Act
# Extract Entries from specified Org files
entry_nodes, file_to_entries = OrgToJsonl.extract_org_entries(org_files=[orgfile])
# Process Each Entry from All Notes Files
entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
# Assert
assert len(jsonl_data) == 2
def test_file_with_no_headings_to_jsonl(tmp_path):
"Ensure files with no heading, only body text are loaded."
# Arrange

View file

@ -268,7 +268,7 @@ def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path):
# Arrange
entry = f"""#+TITLE: title1
Body Line 1
#+TITLE: title2 """
#+TITLE: title2 """
orgfile = create_file(tmp_path, entry)
# Act
@ -286,6 +286,50 @@ Body Line 1
assert entries[0].deadline == ""
# ----------------------------------------------------------------------------------------------------
def test_parse_org_with_intro_text_before_heading(tmp_path):
"Test parsing of org file with intro text before heading"
# Arrange
body = f"""#+TITLE: Title
intro body
* Entry Heading
entry body
"""
orgfile = create_file(tmp_path, body)
# Act
entries = orgnode.makelist(orgfile)
# Assert
assert len(entries) == 2
assert entries[0].heading == "Title"
assert entries[0].body == "intro body\n"
assert entries[1].heading == "Entry Heading"
assert entries[1].body == "entry body\n"
# ----------------------------------------------------------------------------------------------------
def test_parse_org_with_intro_text_multiple_titles_and_heading(tmp_path):
"Test parsing of org file with intro text, multiple titles and heading entry"
# Arrange
body = f"""#+TITLE: Title1
intro body
* Entry Heading
entry body
#+TITLE: Title2 """
orgfile = create_file(tmp_path, body)
# Act
entries = orgnode.makelist(orgfile)
# Assert
assert len(entries) == 2
assert entries[0].heading == "Title1 Title2"
assert entries[0].body == "intro body\n"
assert entries[1].heading == "Entry Heading"
assert entries[1].body == "entry body\n"
# Helper Functions
def create_file(tmp_path, entry, filename="test.org"):
org_file = tmp_path / f"notes/{filename}"