Improve test data organization and update correspoding conftests

- Put test data for each content type into separate directories
- Makes config.yml for docker and local host consistent
  - Prepending tests to /data in sample_config.yml makes application
    run on local host using test data
  - Allows mounting separate volume for each content type in docker-compose
- Ignore gitignore to only add tests content, not generated models or embeddings
This commit is contained in:
Debanjum Singh Solanky 2022-01-29 01:57:08 -05:00
parent 3e889760c7
commit 79c2224eaa
9 changed files with 15 additions and 14 deletions

3
.gitignore vendored
View file

@ -1,7 +1,8 @@
__pycache__ __pycache__
.DS_Store .DS_Store
.emacs.desktop* .emacs.desktop*
tests/data/.* tests/data/models
tests/data/embeddings
src/.data src/.data
.vscode .vscode
*.gz *.gz

View file

@ -39,8 +39,8 @@ def model_dir(search_config):
# Generate Image Embeddings from Test Images # Generate Image Embeddings from Test Images
content_config = ContentConfig() content_config = ContentConfig()
content_config.image = ImageContentConfig( content_config.image = ImageContentConfig(
input_directory = 'tests/data', input_directory = 'tests/data/images',
embeddings_file = model_dir.joinpath('.image_embeddings.pt'), embeddings_file = model_dir.joinpath('image_embeddings.pt'),
batch_size = 10, batch_size = 10,
use_xmp_metadata = False) use_xmp_metadata = False)
@ -48,10 +48,10 @@ def model_dir(search_config):
# Generate Notes Embeddings from Test Notes # Generate Notes Embeddings from Test Notes
content_config.org = TextContentConfig( content_config.org = TextContentConfig(
input_files = ['tests/data/main_readme.org', 'tests/data/interface_emacs_readme.org'], input_files = None,
input_filter = None, input_filter = 'tests/data/notes/*.org',
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'), compressed_jsonl = model_dir.joinpath('notes.jsonl.gz'),
embeddings_file = model_dir.joinpath('.note_embeddings.pt')) embeddings_file = model_dir.joinpath('note_embeddings.pt'))
asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False, verbose=True) asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False, verbose=True)
@ -62,14 +62,14 @@ def model_dir(search_config):
def content_config(model_dir): def content_config(model_dir):
content_config = ContentConfig() content_config = ContentConfig()
content_config.org = TextContentConfig( content_config.org = TextContentConfig(
input_files = ['tests/data/main_readme.org', 'tests/data/interface_emacs_readme.org'], input_files = None,
input_filter = None, input_filter = 'tests/data/notes/*.org',
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'), compressed_jsonl = model_dir.joinpath('notes.jsonl.gz'),
embeddings_file = model_dir.joinpath('.note_embeddings.pt')) embeddings_file = model_dir.joinpath('note_embeddings.pt'))
content_config.image = ImageContentConfig( content_config.image = ImageContentConfig(
input_directory = 'tests/data', input_directory = 'tests/data/images',
embeddings_file = model_dir.joinpath('.image_embeddings.pt'), embeddings_file = model_dir.joinpath('image_embeddings.pt'),
batch_size = 10, batch_size = 10,
use_xmp_metadata = False) use_xmp_metadata = False)

View file

Before

Width:  |  Height:  |  Size: 170 KiB

After

Width:  |  Height:  |  Size: 170 KiB

View file

Before

Width:  |  Height:  |  Size: 330 KiB

After

Width:  |  Height:  |  Size: 330 KiB

View file

Before

Width:  |  Height:  |  Size: 268 KiB

After

Width:  |  Height:  |  Size: 268 KiB