Allow Indexing Heading Entries. Improve Org, TextToJsonl Parser

### Summary
- Set `index_heading_entries` field in `~/.khoj/khoj.yml` to `true` to index entries with empty body

### Main Changes
#### Make Indexing Org-Mode Entries with Empty Body Configurable
- 253c9ea Set `index_heading_entries` field in `khoj.yml` to index entries with no body

### Fix, Improve OrgNode, TextToJsonl Parser
- 9d369ae Fix `OrgNode` render of entries with property drawers and empty body
- 1d3b3d5 Convert field get/set methods in `OrgNode` class to `@property`
- db37e38 Create `OrgNode` `hasBody` method. Use it in `org_to_jsonl` checks
- b4878d7 Extract entries from scratch when regenerate requested
- 52e3dd9 Pass the whole `TextContentConfig` as argument to `text_to_jsonl` methods
- e951ba3 Raise exception when org file not found

Resolves #87
This commit is contained in:
Debanjum 2022-09-11 13:46:11 +00:00 committed by GitHub
commit 182fbbd8df
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 308 additions and 219 deletions

View file

@ -13,13 +13,17 @@ import time
from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
from src.utils.constants import empty_escape_sequences
from src.utils.jsonl import dump_jsonl, compress_jsonl_data
from src.utils.rawconfig import TextContentConfig
logger = logging.getLogger(__name__)
# Define Functions
def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file, previous_entries=None):
def beancount_to_jsonl(config: TextContentConfig, previous_entries=None):
# Extract required fields from config
beancount_files, beancount_file_filter, output_file = config.input_files, config.input_filter, config.compressed_jsonl
# Input Validation
if is_none_or_empty(beancount_files) and is_none_or_empty(beancount_file_filter):
print("At least one of beancount-files or beancount-file-filter is required to be specified")

View file

@ -13,13 +13,17 @@ import time
from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
from src.utils.constants import empty_escape_sequences
from src.utils.jsonl import dump_jsonl, compress_jsonl_data
from src.utils.rawconfig import TextContentConfig
logger = logging.getLogger(__name__)
# Define Functions
def markdown_to_jsonl(markdown_files, markdown_file_filter, output_file, previous_entries=None):
def markdown_to_jsonl(config: TextContentConfig, previous_entries=None):
# Extract required fields from config
markdown_files, markdown_file_filter, output_file = config.input_files, config.input_filter, config.compressed_jsonl
# Input Validation
if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filter):
print("At least one of markdown-files or markdown-file-filter is required to be specified")

View file

@ -14,13 +14,18 @@ from src.processor.org_mode import orgnode
from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
from src.utils.jsonl import dump_jsonl, compress_jsonl_data
from src.utils import state
from src.utils.rawconfig import TextContentConfig
logger = logging.getLogger(__name__)
# Define Functions
def org_to_jsonl(org_files, org_file_filter, output_file, previous_entries=None):
def org_to_jsonl(config: TextContentConfig, previous_entries=None):
# Extract required fields from config
org_files, org_file_filter, output_file = config.input_files, config.input_filter, config.compressed_jsonl
index_heading_entries = config.index_heading_entries
# Input Validation
if is_none_or_empty(org_files) and is_none_or_empty(org_file_filter):
print("At least one of org-files or org-file-filter is required to be specified")
@ -37,7 +42,7 @@ def org_to_jsonl(org_files, org_file_filter, output_file, previous_entries=None)
logger.debug(f"Parse entries from org files into OrgNode objects: {end - start} seconds")
start = time.time()
current_entries = convert_org_nodes_to_entries(entry_nodes, file_to_entries)
current_entries = convert_org_nodes_to_entries(entry_nodes, file_to_entries, index_heading_entries)
end = time.time()
logger.debug(f"Convert OrgNodes into entry dictionaries: {end - start} seconds")
@ -96,40 +101,40 @@ def extract_org_entries(org_files):
return entries, dict(entry_to_file_map)
def convert_org_nodes_to_entries(entries: list[orgnode.Orgnode], entry_to_file_map) -> list[dict]:
def convert_org_nodes_to_entries(entries: list[orgnode.Orgnode], entry_to_file_map, index_heading_entries=False) -> list[dict]:
"Convert Org-Mode entries into list of dictionary"
entry_maps = []
for entry in entries:
entry_dict = dict()
# Ignore title notes i.e notes with just headings and empty body
if not entry.Body() or re.sub(r'\n|\t|\r| ', '', entry.Body()) == "":
if not entry.hasBody and not index_heading_entries:
# Ignore title notes i.e notes with just headings and empty body
continue
entry_dict["compiled"] = f'{entry.Heading()}.'
entry_dict["compiled"] = f'{entry.heading}.'
if state.verbose > 2:
logger.debug(f"Title: {entry.Heading()}")
logger.debug(f"Title: {entry.heading}")
if entry.Tags():
tags_str = " ".join(entry.Tags())
if entry.tags:
tags_str = " ".join(entry.tags)
entry_dict["compiled"] += f'\t {tags_str}.'
if state.verbose > 2:
logger.debug(f"Tags: {tags_str}")
if entry.Closed():
entry_dict["compiled"] += f'\n Closed on {entry.Closed().strftime("%Y-%m-%d")}.'
if entry.closed:
entry_dict["compiled"] += f'\n Closed on {entry.closed.strftime("%Y-%m-%d")}.'
if state.verbose > 2:
logger.debug(f'Closed: {entry.Closed().strftime("%Y-%m-%d")}')
logger.debug(f'Closed: {entry.closed.strftime("%Y-%m-%d")}')
if entry.Scheduled():
entry_dict["compiled"] += f'\n Scheduled for {entry.Scheduled().strftime("%Y-%m-%d")}.'
if entry.scheduled:
entry_dict["compiled"] += f'\n Scheduled for {entry.scheduled.strftime("%Y-%m-%d")}.'
if state.verbose > 2:
logger.debug(f'Scheduled: {entry.Scheduled().strftime("%Y-%m-%d")}')
logger.debug(f'Scheduled: {entry.scheduled.strftime("%Y-%m-%d")}')
if entry.Body():
entry_dict["compiled"] += f'\n {entry.Body()}'
if entry.hasBody:
entry_dict["compiled"] += f'\n {entry.body}'
if state.verbose > 2:
logger.debug(f"Body: {entry.Body()}")
logger.debug(f"Body: {entry.body}")
if entry_dict:
entry_dict["raw"] = f'{entry}'

View file

@ -33,12 +33,12 @@ headline and associated text from an org-mode file, and routines for
constructing data structures of these classes.
"""
import re, sys
import re
import datetime
from pathlib import Path
from os.path import relpath
indent_regex = re.compile(r'^\s*')
indent_regex = re.compile(r'^ *')
def normalize_filename(filename):
"Normalize and escape filename for rendering"
@ -57,12 +57,7 @@ def makelist(filename):
"""
ctr = 0
try:
f = open(filename, 'r')
except IOError:
print(f"Unable to open file {filename}")
print("Program terminating.")
sys.exit(1)
f = open(filename, 'r')
todos = { "TODO": "", "WAITING": "", "ACTIVE": "",
"DONE": "", "CANCELLED": "", "FAILED": ""} # populated from #+SEQ_TODO line
@ -74,41 +69,41 @@ def makelist(filename):
sched_date = ''
deadline_date = ''
logbook = list()
nodelist = []
propdict = dict()
nodelist: list[Orgnode] = list()
property_map = dict()
in_properties_drawer = False
in_logbook_drawer = False
file_title = f'{filename}'
for line in f:
ctr += 1
hdng = re.search(r'^(\*+)\s(.*?)\s*$', line)
if hdng: # we are processing a heading line
heading_search = re.search(r'^(\*+)\s(.*?)\s*$', line)
if heading_search: # we are processing a heading line
if heading: # if we have are on second heading, append first heading to headings list
thisNode = Orgnode(level, heading, bodytext, tags)
if closed_date:
thisNode.setClosed(closed_date)
thisNode.closed = closed_date
closed_date = ''
if sched_date:
thisNode.setScheduled(sched_date)
thisNode.scheduled = sched_date
sched_date = ""
if deadline_date:
thisNode.setDeadline(deadline_date)
thisNode.deadline = deadline_date
deadline_date = ''
if logbook:
thisNode.setLogbook(logbook)
thisNode.logbook = logbook
logbook = list()
thisNode.setProperties(propdict)
thisNode.properties = property_map
nodelist.append( thisNode )
propdict = {'LINE': f'file:{normalize_filename(filename)}::{ctr}'}
level = hdng.group(1)
heading = hdng.group(2)
property_map = {'LINE': f'file:{normalize_filename(filename)}::{ctr}'}
level = heading_search.group(1)
heading = heading_search.group(2)
bodytext = ""
tags = list() # set of all tags in headline
tagsrch = re.search(r'(.*?)\s*:([a-zA-Z0-9].*?):$',heading)
if tagsrch:
heading = tagsrch.group(1)
parsedtags = tagsrch.group(2)
tag_search = re.search(r'(.*?)\s*:([a-zA-Z0-9].*?):$',heading)
if tag_search:
heading = tag_search.group(1)
parsedtags = tag_search.group(2)
if parsedtags:
for parsedtag in parsedtags.split(':'):
if parsedtag != '': tags.append(parsedtag)
@ -153,13 +148,13 @@ def makelist(filename):
logbook += [(clocked_in, clocked_out)]
line = ""
prop_srch = re.search(r'^\s*:([a-zA-Z0-9]+):\s*(.*?)\s*$', line)
if prop_srch:
property_search = re.search(r'^\s*:([a-zA-Z0-9]+):\s*(.*?)\s*$', line)
if property_search:
# Set ID property to an id based org-mode link to the entry
if prop_srch.group(1) == 'ID':
propdict['ID'] = f'id:{prop_srch.group(2)}'
if property_search.group(1) == 'ID':
property_map['ID'] = f'id:{property_search.group(2)}'
else:
propdict[prop_srch.group(1)] = prop_srch.group(2)
property_map[property_search.group(1)] = property_search.group(2)
continue
cd_re = re.search(r'CLOSED:\s*\[([0-9]{4})-([0-9]{2})-([0-9]{2})', line)
@ -184,39 +179,38 @@ def makelist(filename):
# write out last node
thisNode = Orgnode(level, heading or file_title, bodytext, tags)
thisNode.setProperties(propdict)
thisNode.properties = property_map
if sched_date:
thisNode.setScheduled(sched_date)
thisNode.scheduled = sched_date
if deadline_date:
thisNode.setDeadline(deadline_date)
thisNode.deadline = deadline_date
if closed_date:
thisNode.setClosed(closed_date)
thisNode.closed = closed_date
if logbook:
thisNode.setLogbook(logbook)
thisNode.logbook = logbook
nodelist.append( thisNode )
# using the list of TODO keywords found in the file
# process the headings searching for TODO keywords
for n in nodelist:
h = n.Heading()
todoSrch = re.search(r'([A-Z]+)\s(.*?)$', h)
if todoSrch:
if todoSrch.group(1) in todos:
n.setHeading( todoSrch.group(2) )
n.setTodo ( todoSrch.group(1) )
todo_search = re.search(r'([A-Z]+)\s(.*?)$', n.heading)
if todo_search:
if todo_search.group(1) in todos:
n.heading = todo_search.group(2)
n.todo = todo_search.group(1)
# extract, set priority from heading, update heading if necessary
prtysrch = re.search(r'^\[\#(A|B|C)\] (.*?)$', n.Heading())
if prtysrch:
n.setPriority(prtysrch.group(1))
n.setHeading(prtysrch.group(2))
priority_search = re.search(r'^\[\#(A|B|C)\] (.*?)$', n.heading)
if priority_search:
n.priority = priority_search.group(1)
n.heading = priority_search.group(2)
# Set SOURCE property to a file+heading based org-mode link to the entry
if n.Level() == 0:
if n.level == 0:
n.properties['LINE'] = f'file:{normalize_filename(filename)}::0'
n.properties['SOURCE'] = f'[[file:{normalize_filename(filename)}]]'
else:
escaped_heading = n.Heading().replace("[","\\[").replace("]","\\]")
escaped_heading = n.heading.replace("[","\\[").replace("]","\\]")
n.properties['SOURCE'] = f'[[file:{normalize_filename(filename)}::*{escaped_heading}]]'
return nodelist
@ -234,199 +228,235 @@ class Orgnode(object):
first tag. The makelist routine postprocesses the list to
identify TODO tags and updates headline and todo fields.
"""
self.level = len(level)
self.headline = headline
self.body = body
self.tags = tags # All tags in the headline
self.todo = ""
self.prty = "" # empty of A, B or C
self.scheduled = "" # Scheduled date
self.deadline = "" # Deadline date
self.closed = "" # Closed date
self.properties = dict()
self.logbook = list() # List of clock-in, clock-out tuples representing logbook entries
self._level = len(level)
self._heading = headline
self._body = body
self._tags = tags # All tags in the headline
self._todo = ""
self._priority = "" # empty of A, B or C
self._scheduled = "" # Scheduled date
self._deadline = "" # Deadline date
self._closed = "" # Closed date
self._properties = dict()
self._logbook = list() # List of clock-in, clock-out tuples representing logbook entries
# Look for priority in headline and transfer to prty field
def Heading(self):
@property
def heading(self):
"""
Return the Heading text of the node without the TODO tag
"""
return self.headline
return self._heading
def setHeading(self, newhdng):
@heading.setter
def heading(self, newhdng):
"""
Change the heading to the supplied string
"""
self.headline = newhdng
self._heading = newhdng
def Body(self):
@property
def body(self):
"""
Returns all lines of text of the body of this node except the
Property Drawer
"""
return self.body
return self._body
def Level(self):
@property
def hasBody(self):
"""
Returns True if node has non empty body, else False
"""
return self._body and re.sub(r'\n|\t|\r| ', '', self._body) != ''
@property
def level(self):
"""
Returns an integer corresponding to the level of the node.
Top level (one asterisk) has a level of 1.
"""
return self.level
return self._level
def Priority(self):
@property
def priority(self):
"""
Returns the priority of this headline: 'A', 'B', 'C' or empty
string if priority has not been set.
"""
return self.prty
return self._priority
def setPriority(self, newprty):
@priority.setter
def priority(self, new_priority):
"""
Change the value of the priority of this headline.
Values values are '', 'A', 'B', 'C'
"""
self.prty = newprty
self._priority = new_priority
def Tags(self):
@property
def tags(self):
"""
Returns the list of all tags
For example, :HOME:COMPUTER: would return ['HOME', 'COMPUTER']
"""
return self.tags
return self._tags
def hasTag(self, srch):
@property
def hasTag(self, tag):
"""
Returns True if the supplied tag is present in this headline
For example, hasTag('COMPUTER') on headling containing
:HOME:COMPUTER: would return True.
"""
return srch in self.tags
return tag in self._tags
def setTags(self, newtags):
@tags.setter
def tags(self, newtags):
"""
Store all the tags found in the headline.
"""
self.tags = newtags
self._tags = newtags
def Todo(self):
@property
def todo(self):
"""
Return the value of the TODO tag
"""
return self.todo
return self._todo
def setTodo(self, value):
@todo.setter
def todo(self, new_todo):
"""
Set the value of the TODO tag to the supplied string
"""
self.todo = value
self._todo = new_todo
def setProperties(self, dictval):
@property
def properties(self):
"""
Return the dictionary of properties
"""
return self._properties
@properties.setter
def properties(self, new_properties):
"""
Sets all properties using the supplied dictionary of
name/value pairs
"""
self.properties = dictval
self._properties = new_properties
def Property(self, keyval):
def Property(self, property_key):
"""
Returns the value of the requested property or null if the
property does not exist.
"""
return self.properties.get(keyval, "")
return self._properties.get(property_key, "")
def setScheduled(self, dateval):
@property
def scheduled(self):
"""
Set the scheduled date using the supplied date object
Return the scheduled date
"""
self.scheduled = dateval
return self._scheduled
def Scheduled(self):
@scheduled.setter
def scheduled(self, new_scheduled):
"""
Return the scheduled date object or null if nonexistent
Set the scheduled date to the scheduled date
"""
return self.scheduled
self._scheduled = new_scheduled
def setDeadline(self, dateval):
@property
def deadline(self):
"""
Set the deadline (due) date using the supplied date object
Return the deadline date
"""
self.deadline = dateval
return self._deadline
def Deadline(self):
@deadline.setter
def deadline(self, new_deadline):
"""
Return the deadline date object or null if nonexistent
Set the deadline (due) date to the new deadline date
"""
return self.deadline
self._deadline = new_deadline
def setClosed(self, dateval):
@property
def closed(self):
"""
Set the closed date using the supplied date object
Return the closed date
"""
self.closed = dateval
return self._closed
def Closed(self):
@closed.setter
def closed(self, new_closed):
"""
Return the closed date object or null if nonexistent
Set the closed date to the new closed date
"""
return self.closed
self._closed = new_closed
def setLogbook(self, logbook):
"""
Set the logbook with list of clocked-in, clocked-out tuples for the entry
"""
self.logbook = logbook
def Logbook(self):
@property
def logbook(self):
"""
Return the logbook with all clocked-in, clocked-out date object pairs or empty list if nonexistent
"""
return self.logbook
return self._logbook
@logbook.setter
def logbook(self, new_logbook):
"""
Set the logbook with list of clocked-in, clocked-out tuples for the entry
"""
self._logbook = new_logbook
def __repr__(self):
"""
Print the level, heading text and tag of a node and the body
text as used to construct the node.
"""
# This method is not completed yet.
# Output heading line
n = ''
for _ in range(0, self.level):
for _ in range(0, self._level):
n = n + '*'
n = n + ' '
if self.todo:
n = n + self.todo + ' '
if self.prty:
n = n + '[#' + self.prty + '] '
n = n + self.headline
if self._todo:
n = n + self._todo + ' '
if self._priority:
n = n + '[#' + self._priority + '] '
n = n + self._heading
n = "%-60s " % n # hack - tags will start in column 62
closecolon = ''
for t in self.tags:
for t in self._tags:
n = n + ':' + t
closecolon = ':'
n = n + closecolon
n = n + "\n"
# Get body indentation from first line of body
indent = indent_regex.match(self.body).group()
indent = indent_regex.match(self._body).group()
# Output Closed Date, Scheduled Date, Deadline Date
if self.closed or self.scheduled or self.deadline:
if self._closed or self._scheduled or self._deadline:
n = n + indent
if self.closed:
n = n + f'CLOSED: [{self.closed.strftime("%Y-%m-%d %a")}] '
if self.scheduled:
n = n + f'SCHEDULED: <{self.scheduled.strftime("%Y-%m-%d %a")}> '
if self.deadline:
n = n + f'DEADLINE: <{self.deadline.strftime("%Y-%m-%d %a")}> '
if self.closed or self.scheduled or self.deadline:
if self._closed:
n = n + f'CLOSED: [{self._closed.strftime("%Y-%m-%d %a")}] '
if self._scheduled:
n = n + f'SCHEDULED: <{self._scheduled.strftime("%Y-%m-%d %a")}> '
if self._deadline:
n = n + f'DEADLINE: <{self._deadline.strftime("%Y-%m-%d %a")}> '
if self._closed or self._scheduled or self._deadline:
n = n + '\n'
# Ouput Property Drawer
n = n + indent + ":PROPERTIES:\n"
for key, value in self.properties.items():
for key, value in self._properties.items():
n = n + indent + f":{key}: {value}\n"
n = n + indent + ":END:\n"
n = n + self.body
# Output Body
if self.hasBody:
n = n + self._body
return n

View file

@ -182,8 +182,8 @@ def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchCon
# Map notes in text files to (compressed) JSONL formatted file
config.compressed_jsonl = resolve_absolute_path(config.compressed_jsonl)
previous_entries = extract_entries(config.compressed_jsonl) if config.compressed_jsonl.exists() else None
entries_with_indices = text_to_jsonl(config.input_files, config.input_filter, config.compressed_jsonl, previous_entries)
previous_entries = extract_entries(config.compressed_jsonl) if config.compressed_jsonl.exists() and not regenerate else None
entries_with_indices = text_to_jsonl(config, previous_entries)
# Extract Updated Entries
entries = extract_entries(config.compressed_jsonl)

View file

@ -18,6 +18,7 @@ class TextContentConfig(ConfigBase):
input_filter: Optional[str]
compressed_jsonl: Path
embeddings_file: Path
index_heading_entries: Optional[bool] = False
@validator('input_filter')
def input_filter_or_files_required(cls, input_filter, values, **kwargs):

View file

@ -6,28 +6,33 @@ from src.processor.org_mode.org_to_jsonl import convert_org_entries_to_jsonl, co
from src.utils.helpers import is_none_or_empty
def test_entry_with_empty_body_line_to_jsonl(tmp_path):
'''Ensure entries with empty body are ignored.
def test_configure_heading_entry_to_jsonl(tmp_path):
'''Ensure entries with empty body are ignored, unless explicitly configured to index heading entries.
Property drawers not considered Body. Ignore control characters for evaluating if Body empty.'''
# Arrange
entry = f'''*** Heading
:PROPERTIES:
:ID: 42-42-42
:END:
\t\r
\t \r
'''
orgfile = create_file(tmp_path, entry)
# Act
# Extract Entries from specified Org files
entry_nodes, file_to_entries = extract_org_entries(org_files=[orgfile])
for index_heading_entries in [True, False]:
# Act
# Extract entries into jsonl from specified Org files
jsonl_string = convert_org_entries_to_jsonl(convert_org_nodes_to_entries(
*extract_org_entries(org_files=[orgfile]),
index_heading_entries=index_heading_entries))
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
# Process Each Entry from All Notes Files
entries = convert_org_nodes_to_entries(entry_nodes, file_to_entries)
jsonl_data = convert_org_entries_to_jsonl(entries)
# Assert
assert is_none_or_empty(jsonl_data)
# Assert
if index_heading_entries:
# Entry with empty body indexed when index_heading_entries set to True
assert len(jsonl_data) == 1
else:
# Entry with empty body ignored when index_heading_entries set to False
assert is_none_or_empty(jsonl_data)
def test_entry_with_body_to_jsonl(tmp_path):

View file

@ -1,7 +1,5 @@
# Standard Packages
import datetime
from os.path import relpath
from pathlib import Path
# Internal Packages
from src.processor.org_mode import orgnode
@ -20,14 +18,14 @@ def test_parse_entry_with_no_headings(tmp_path):
# Assert
assert len(entries) == 1
assert entries[0].Heading() == f'{orgfile}'
assert entries[0].Tags() == list()
assert entries[0].Body() == "Body Line 1"
assert entries[0].Priority() == ""
assert entries[0].heading == f'{orgfile}'
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].Closed() == ""
assert entries[0].Scheduled() == ""
assert entries[0].Deadline() == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
# ----------------------------------------------------------------------------------------------------
@ -44,14 +42,14 @@ Body Line 1'''
# Assert
assert len(entries) == 1
assert entries[0].Heading() == "Heading"
assert entries[0].Tags() == list()
assert entries[0].Body() == "Body Line 1"
assert entries[0].Priority() == ""
assert entries[0].heading == "Heading"
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].Closed() == ""
assert entries[0].Scheduled() == ""
assert entries[0].Deadline() == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
# ----------------------------------------------------------------------------------------------------
@ -77,16 +75,44 @@ Body Line 2'''
# Assert
assert len(entries) == 1
assert entries[0].Heading() == "Heading"
assert entries[0].Todo() == "DONE"
assert entries[0].Tags() == ["Tag1", "TAG2", "tag3"]
assert entries[0].Body() == "- Clocked Log 1\nBody Line 1\nBody Line 2"
assert entries[0].Priority() == "A"
assert entries[0].heading == "Heading"
assert entries[0].todo == "DONE"
assert entries[0].tags == ["Tag1", "TAG2", "tag3"]
assert entries[0].body == "- Clocked Log 1\nBody Line 1\nBody Line 2"
assert entries[0].priority == "A"
assert entries[0].Property("ID") == "id:123-456-789-4234-1231"
assert entries[0].Closed() == datetime.date(1984,4,1)
assert entries[0].Scheduled() == datetime.date(1984,4,1)
assert entries[0].Deadline() == datetime.date(1984,4,1)
assert entries[0].Logbook() == [(datetime.datetime(1984,4,1,9,0,0), datetime.datetime(1984,4,1,12,0,0))]
assert entries[0].closed == datetime.date(1984,4,1)
assert entries[0].scheduled == datetime.date(1984,4,1)
assert entries[0].deadline == datetime.date(1984,4,1)
assert entries[0].logbook == [(datetime.datetime(1984,4,1,9,0,0), datetime.datetime(1984,4,1,12,0,0))]
# ----------------------------------------------------------------------------------------------------
def test_render_entry_with_property_drawer_and_empty_body(tmp_path):
"Render heading entry with property drawer"
# Arrange
entry_to_render = f'''
*** [#A] Heading1 :tag1:
:PROPERTIES:
:ID: 111-111-111-1111-1111
:END:
\t\r \n
'''
orgfile = create_file(tmp_path, entry_to_render)
expected_entry = f'''*** [#A] Heading1 :tag1:
:PROPERTIES:
:LINE: file:{orgfile}::2
:ID: id:111-111-111-1111-1111
:SOURCE: [[file:{orgfile}::*Heading1]]
:END:
'''
# Act
parsed_entries = orgnode.makelist(orgfile)
# Assert
assert f'{parsed_entries[0]}' == expected_entry
# ----------------------------------------------------------------------------------------------------
@ -109,7 +135,7 @@ Body Line 2
# Assert
# SOURCE link rendered with Heading
assert f':SOURCE: [[file:{orgfile}::*{entries[0].Heading()}]]' in f'{entries[0]}'
assert f':SOURCE: [[file:{orgfile}::*{entries[0].heading}]]' in f'{entries[0]}'
# ID link rendered with ID
assert f':ID: id:123-456-789-4234-1231' in f'{entries[0]}'
# LINE link rendered with line number
@ -134,7 +160,7 @@ Body Line 1'''
# Assert
assert len(entries) == 1
# parsed heading from entry
assert entries[0].Heading() == "Heading[1]"
assert entries[0].heading == "Heading[1]"
# ensure SOURCE link has square brackets in filename, heading escaped in rendered entries
escaped_orgfile = f'{orgfile}'.replace("[1]", "\\[1\\]")
assert f':SOURCE: [[file:{escaped_orgfile}::*Heading\[1\]' in f'{entries[0]}'
@ -176,16 +202,16 @@ Body 2
# Assert
assert len(entries) == 2
for index, entry in enumerate(entries):
assert entry.Heading() == f"Heading{index+1}"
assert entry.Todo() == "FAILED" if index == 0 else "CANCELLED"
assert entry.Tags() == [f"tag{index+1}"]
assert entry.Body() == f"- Clocked Log {index+1}\nBody {index+1}\n\n"
assert entry.Priority() == "A"
assert entry.heading == f"Heading{index+1}"
assert entry.todo == "FAILED" if index == 0 else "CANCELLED"
assert entry.tags == [f"tag{index+1}"]
assert entry.body == f"- Clocked Log {index+1}\nBody {index+1}\n\n"
assert entry.priority == "A"
assert entry.Property("ID") == f"id:123-456-789-4234-000{index+1}"
assert entry.Closed() == datetime.date(1984,4,index+1)
assert entry.Scheduled() == datetime.date(1984,4,index+1)
assert entry.Deadline() == datetime.date(1984,4,index+1)
assert entry.Logbook() == [(datetime.datetime(1984,4,index+1,9,0,0), datetime.datetime(1984,4,index+1,12,0,0))]
assert entry.closed == datetime.date(1984,4,index+1)
assert entry.scheduled == datetime.date(1984,4,index+1)
assert entry.deadline == datetime.date(1984,4,index+1)
assert entry.logbook == [(datetime.datetime(1984,4,index+1,9,0,0), datetime.datetime(1984,4,index+1,12,0,0))]
# ----------------------------------------------------------------------------------------------------
@ -201,14 +227,14 @@ Body Line 1'''
# Assert
assert len(entries) == 1
assert entries[0].Heading() == f'{orgfile}'
assert entries[0].Tags() == list()
assert entries[0].Body() == "Body Line 1"
assert entries[0].Priority() == ""
assert entries[0].heading == f'{orgfile}'
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].Closed() == ""
assert entries[0].Scheduled() == ""
assert entries[0].Deadline() == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
# ----------------------------------------------------------------------------------------------------
@ -224,14 +250,14 @@ Body Line 1'''
# Assert
assert len(entries) == 1
assert entries[0].Heading() == 'test'
assert entries[0].Tags() == list()
assert entries[0].Body() == "Body Line 1"
assert entries[0].Priority() == ""
assert entries[0].heading == 'test'
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].Closed() == ""
assert entries[0].Scheduled() == ""
assert entries[0].Deadline() == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
# ----------------------------------------------------------------------------------------------------
@ -248,14 +274,14 @@ Body Line 1
# Assert
assert len(entries) == 1
assert entries[0].Heading() == 'title1 title2'
assert entries[0].Tags() == list()
assert entries[0].Body() == "Body Line 1\n"
assert entries[0].Priority() == ""
assert entries[0].heading == 'title1 title2'
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1\n"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].Closed() == ""
assert entries[0].Scheduled() == ""
assert entries[0].Deadline() == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
# Helper Functions

View file

@ -13,6 +13,20 @@ from src.processor.org_mode.org_to_jsonl import org_to_jsonl
# Test
# ----------------------------------------------------------------------------------------------------
def test_asymmetric_setup_with_missing_file_raises_error(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
file_to_index = Path(content_config.org.input_filter).parent / "new_file_to_index.org"
new_org_content_config = deepcopy(content_config.org)
new_org_content_config.input_files = [f'{file_to_index}']
new_org_content_config.input_filter = None
# Act
# Generate notes embeddings during asymmetric setup
with pytest.raises(FileNotFoundError):
text_search.setup(org_to_jsonl, new_org_content_config, search_config.asymmetric, regenerate=True)
# ----------------------------------------------------------------------------------------------------
def test_asymmetric_setup_with_empty_file_raises_error(content_config: ContentConfig, search_config: SearchConfig):
# Arrange