diff --git a/README.org b/README.org
index 20f3301a..478fba82 100644
--- a/README.org
+++ b/README.org
@@ -50,8 +50,9 @@
        #+end_src
 
 **** 3. Configure
-     - Configure application search types and their underlying data source/files in ~sample_config.yml~
-     - Use the ~sample_config.yml~ as reference
+     - Configure files/directories to search in ~content-type~ section of ~sample_config.yml~
+     - To run application on test data, update file paths containing ~/data/~ to ~tests/data/~ in  ~sample_config.yml~
+       - Example replace ~/data/notes/*.org~ with ~tests/data/notes/*.org~
 
 **** 4. Run
      Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML
diff --git a/docker-compose.yml b/docker-compose.yml
index c1115d87..e9b1dbfb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -20,13 +20,14 @@ services:
       #   points to the files you want to index.
       # The path of the mounted directory (right hand side),
       #   must match the path prefix in your config file.
-      - ./tests/data/:/data/notes/
-      - ./tests/data/:/data/images/
-      - ./tests/data/:/data/ledger/
-      - ./tests/data/:/data/music/
-      # It's ok if you don't have existing embeddings. 
-      # You can set this volume to point to an empty folder.
+      - ./tests/data/notes/:/data/notes/
+      - ./tests/data/images/:/data/images/
+      - ./tests/data/ledger/:/data/ledger/
+      - ./tests/data/music/:/data/music/
+      # Embeddings and models are populated after the first run
+      # You can set these volumes to point to empty directories on host
       - ./tests/data/embeddings/:/data/generated/
+      - ./tests/data/models/:/data/models/
 
     # Use 0.0.0.0 to explicitly set the host ip for the service on the container. https://pythonspeed.com/articles/docker-connection-refused/
-    command: --host="0.0.0.0" --port=8000 -c=docker_sample_config.yml -vv
+    command: --host="0.0.0.0" --port=8000 -c=sample_config.yml -vv
diff --git a/docker_sample_config.yml b/docker_sample_config.yml
deleted file mode 100644
index e9e23b75..00000000
--- a/docker_sample_config.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-content-type:
-  # The /data/folder/ prefix to the folders is here because this is 
-  # the directory to which the local files are copied in the docker-compose.
-  # If changing, the docker-compose volumes should also be changed to match.
-  org:
-    input-files: null
-    input-filter: "/data/notes/*.org"
-    compressed-jsonl: "/data/generated/.notes.json.gz"
-    embeddings-file: "/data/generated/.note_embeddings.pt"
-
-  ledger:
-    input-files: null
-    input-filter: /data/ledger/*.beancount
-    compressed-jsonl: /data/generated/.transactions.jsonl.gz
-    embeddings-file: /data/generated/.transaction_embeddings.pt
-
-  image:
-    input-directory: "/data/images/"
-    embeddings-file: "/data/generated/.image_embeddings.pt"
-    batch-size: 50
-    use-xmp-metadata: true
-
-  music:
-    input-files: null
-    input-filter: "/data/music/*.org"
-    compressed-jsonl: "/data/generated/.songs.jsonl.gz"
-    embeddings-file: "/data/generated/.song_embeddings.pt"
-
-search-type:
-  symmetric:
-    encoder: "sentence-transformers/paraphrase-MiniLM-L6-v2"
-    cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
-    model_directory: "/data/models/.symmetric"
-
-  asymmetric:
-    encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3"
-    cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
-    model_directory: "/data/models/.asymmetric"
-
-  image:
-    encoder: "clip-ViT-B-32"
-    model_directory: "/data/models/.image_encoder"
-
-processor:
-  conversation:
-    openai-api-key: null
-    conversation-logfile: "/data/conversation/.conversation_logs.json"
\ No newline at end of file
diff --git a/sample_config.yml b/sample_config.yml
index 8805c984..b16759e8 100644
--- a/sample_config.yml
+++ b/sample_config.yml
@@ -1,44 +1,47 @@
 content-type:
+  # The /data/folder/ prefix to the folders is here because this is
+  # the directory to which the local files are copied in the docker-compose.
+  # If changing, the docker-compose volumes should also be changed to match.
   org:
-    input-files: ["tests/data/main_readme.org", "tests/data/interface_emacs_readme.org"]
-    input-filter: null
-    compressed-jsonl: "tests/data/.notes.json.gz"
-    embeddings-file: "tests/data/.note_embeddings.pt"
+    input-files: null
+    input-filter: "/data/notes/*.org"
+    compressed-jsonl: "/data/generated/notes.json.gz"
+    embeddings-file: "/data/generated/note_embeddings.pt"
 
   ledger:
     input-files: null
-    input-filter: tests/data/*.beancount
-    compressed-jsonl: tests/data/.transactions.jsonl.gz
-    embeddings-file: tests/data/.transaction_embeddings.pt
+    input-filter: /data/ledger/*.beancount
+    compressed-jsonl: /data/generated/transactions.jsonl.gz
+    embeddings-file: /data/generated/transaction_embeddings.pt
 
   image:
-    input-directory: "tests/data"
-    embeddings-file: "tests/data/.image_embeddings.pt"
+    input-directory: "/data/images/"
+    embeddings-file: "/data/generated/image_embeddings.pt"
     batch-size: 50
-    use-xmp-metadata: false
+    use-xmp-metadata: true
 
   music:
-    input-files: ["tests/data/music.org"]
+    input-files: ["/data/music/music.org"]
     input-filter: null
-    compressed-jsonl: "tests/data/.songs.jsonl.gz"
-    embeddings-file: "tests/data/.song_embeddings.pt"
+    compressed-jsonl: "/data/generated/songs.jsonl.gz"
+    embeddings-file: "/data/generated/song_embeddings.pt"
 
 search-type:
   symmetric:
     encoder: "sentence-transformers/paraphrase-MiniLM-L6-v2"
     cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
-    model_directory: "tests/data/.symmetric"
+    model_directory: "/data/models/symmetric"
 
   asymmetric:
     encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3"
     cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
-    model_directory: "tests/data/.asymmetric"
+    model_directory: "/data/models/asymmetric"
 
   image:
     encoder: "clip-ViT-B-32"
-    model_directory: "tests/data/.image_encoder"
+    model_directory: "/data/models/image_encoder"
 
 processor:
   conversation:
     openai-api-key: null
-    conversation-logfile: "tests/data/.conversation_logs.json"
\ No newline at end of file
+    conversation-logfile: "/data/generated/conversation_logs.json"
\ No newline at end of file