mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
Extract empty line escape sequences var into constants file for reuse
This commit is contained in:
parent
624a3faf92
commit
3d8a07f252
2 changed files with 5 additions and 4 deletions
|
@ -11,6 +11,7 @@ import re
|
|||
# Internal Packages
|
||||
from src.processor.org_mode import orgnode
|
||||
from src.utils.helpers import get_absolute_path, is_none_or_empty
|
||||
from src.utils.constants import empty_escape_sequences
|
||||
|
||||
|
||||
# Define Functions
|
||||
|
@ -62,7 +63,6 @@ def load_jsonl(input_path, verbose=0):
|
|||
# Initialize Variables
|
||||
data = []
|
||||
jsonl_file = None
|
||||
escape_sequences = '\n|\r\t '
|
||||
|
||||
# Open JSONL file
|
||||
if input_path.suffix == ".gz":
|
||||
|
@ -72,7 +72,7 @@ def load_jsonl(input_path, verbose=0):
|
|||
|
||||
# Read JSONL file
|
||||
for line in jsonl_file:
|
||||
data.append(json.loads(line.strip(escape_sequences)))
|
||||
data.append(json.loads(line.strip(empty_escape_sequences)))
|
||||
|
||||
# Close JSONL file
|
||||
jsonl_file.close()
|
||||
|
@ -114,13 +114,13 @@ def extract_beancount_entries(beancount_files):
|
|||
|
||||
# Initialize Regex for extracting Beancount Entries
|
||||
date_regex = r'^\n?\d{4}-\d{2}-\d{2}'
|
||||
empty_newline = r'^[\n\r\t ]*$'
|
||||
empty_newline = f'^[{empty_escape_sequences}]*$'
|
||||
|
||||
entries = []
|
||||
for beancount_file in beancount_files:
|
||||
with open(beancount_file) as f:
|
||||
ledger_content = f.read()
|
||||
entries.extend([entry.strip('\n|\r|\t| ')
|
||||
entries.extend([entry.strip(empty_escape_sequences)
|
||||
for entry
|
||||
in re.split(empty_newline, ledger_content, flags=re.MULTILINE)
|
||||
if re.match(date_regex, entry)])
|
||||
|
|
1
src/utils/constants.py
Normal file
1
src/utils/constants.py
Normal file
|
@ -0,0 +1 @@
|
|||
empty_escape_sequences = r'\n|\r\t '
|
Loading…
Add table
Reference in a new issue