mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Allow importing org-to-jsonl as module for reuse
To allow importing org-to-jsonl as module - Wrap code in __main__ into a org-to-jsonl method - Rename processor/org-mode to processor/org_mode - Add __init__.py to processor directory
This commit is contained in:
parent
5f8221f77e
commit
19d6678eb1
4 changed files with 21 additions and 15 deletions
0
processor/org_mode/__init__.py
Normal file
0
processor/org_mode/__init__.py
Normal file
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
# Import Modules
|
# Import Modules
|
||||||
import orgnode
|
import processor.org_mode.orgnode
|
||||||
import json
|
import json
|
||||||
import argparse
|
import argparse
|
||||||
import pathlib
|
import pathlib
|
||||||
|
@ -10,6 +10,24 @@ import gzip
|
||||||
|
|
||||||
|
|
||||||
# Define Functions
|
# Define Functions
|
||||||
|
def org_to_jsonl(org_files, org_file_filter, output_path, verbose=False):
|
||||||
|
# Get Org Files to Process
|
||||||
|
org_files = get_org_files(args.input_files, args.input_filter)
|
||||||
|
|
||||||
|
# Extract Entries from specified Org files
|
||||||
|
entries = extract_org_entries(org_files)
|
||||||
|
|
||||||
|
# Process Each Entry from All Notes Files
|
||||||
|
jsonl_data = convert_org_entries_to_jsonl(entries, verbose=args.verbose)
|
||||||
|
|
||||||
|
# Compress JSONL formatted Data
|
||||||
|
if args.output_file.suffix == ".gz":
|
||||||
|
compress_jsonl_data(jsonl_data, args.output_file, verbose=args.verbose)
|
||||||
|
elif args.output_file.suffix == ".jsonl":
|
||||||
|
dump_jsonl(jsonl_data, args.output_file, verbose=args.verbose)
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
def dump_jsonl(jsonl_data, output_path, verbose=0):
|
def dump_jsonl(jsonl_data, output_path, verbose=0):
|
||||||
"Write List of JSON objects to JSON line file"
|
"Write List of JSON objects to JSON line file"
|
||||||
with open(get_absolute_path(output_path), 'w', encoding='utf-8') as f:
|
with open(get_absolute_path(output_path), 'w', encoding='utf-8') as f:
|
||||||
|
@ -123,17 +141,5 @@ if __name__ == '__main__':
|
||||||
print("At least one of org-files or org-file-filter is required to be specified")
|
print("At least one of org-files or org-file-filter is required to be specified")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# Get Org Files to Process
|
# Map notes in Org-Mode files to (compressed) JSONL formatted file
|
||||||
org_files = get_org_files(args.input_files, args.input_filter)
|
org_to_jsonl(args.input_files, args.input_filter, args.output_file, args.verbose)
|
||||||
|
|
||||||
# Extract Entries from specified Org files
|
|
||||||
entries = extract_org_entries(org_files)
|
|
||||||
|
|
||||||
# Process Each Entry from All Notes Files
|
|
||||||
jsonl_data = convert_org_entries_to_jsonl(entries, verbose=args.verbose)
|
|
||||||
|
|
||||||
# Compress JSONL formatted Data
|
|
||||||
if args.output_file.suffix == ".gz":
|
|
||||||
compress_jsonl_data(jsonl_data, args.output_file, verbose=args.verbose)
|
|
||||||
elif args.output_file.suffix == ".jsonl":
|
|
||||||
dump_jsonl(jsonl_data, args.output_file, verbose=args.verbose)
|
|
Loading…
Reference in a new issue