mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-30 10:53:02 +01:00
Fix striping empty escape sequences from strings
- Fix log message on jsonl write
This commit is contained in:
parent
a7cf6c8458
commit
2e1bbe0cac
4 changed files with 5 additions and 4 deletions
|
@ -88,7 +88,7 @@ def extract_beancount_transactions(beancount_files):
|
|||
|
||||
# Initialize Regex for extracting Beancount Entries
|
||||
transaction_regex = r'^\n?\d{4}-\d{2}-\d{2} [\*|\!] '
|
||||
empty_newline = f'^[{empty_escape_sequences}]*$'
|
||||
empty_newline = f'^[\n\r\t\ ]*$'
|
||||
|
||||
entries = []
|
||||
transaction_to_file_map = []
|
||||
|
|
|
@ -97,7 +97,8 @@ def extract_markdown_entries(markdown_files):
|
|||
markdown_content = f.read()
|
||||
markdown_entries_per_file = [f'#{entry.strip(empty_escape_sequences)}'
|
||||
for entry
|
||||
in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)]
|
||||
in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)
|
||||
if entry.strip(empty_escape_sequences) != '']
|
||||
entry_to_file_map += zip(markdown_entries_per_file, [markdown_file]*len(markdown_entries_per_file))
|
||||
entries.extend(markdown_entries_per_file)
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ from pathlib import Path
|
|||
|
||||
app_root_directory = Path(__file__).parent.parent.parent
|
||||
web_directory = app_root_directory / 'src/interface/web/'
|
||||
empty_escape_sequences = r'\n|\r\t '
|
||||
empty_escape_sequences = '\n|\r|\t| '
|
||||
|
||||
# default app config to use
|
||||
default_config = {
|
||||
|
|
|
@ -54,4 +54,4 @@ def compress_jsonl_data(jsonl_data, output_path):
|
|||
with gzip.open(output_path, 'wt') as gzip_file:
|
||||
gzip_file.write(jsonl_data)
|
||||
|
||||
logger.info(f'Wrote {len(jsonl_data)} lines to gzip compressed jsonl at {output_path}')
|
||||
logger.info(f'Wrote jsonl data to gzip compressed jsonl at {output_path}')
|
Loading…
Reference in a new issue