Sort extracted images before computing their embeddings

- Image order returned by glob is OS dependent
- This prevented sharing image embeddings across machines running different OS
- A stable sort order for processed images allows sharing embeddings
  across machines.
- Use case:
  A more powerful, always on machine actually computes the image embeddings regularly
  The client machine just load these periodically to provide semantic search functionality
This commit is contained in:
Debanjum Singh Solanky 2022-07-20 03:51:27 +04:00
parent c4c7f38b15
commit d68a9dc445

View file

@ -40,7 +40,7 @@ def extract_entries(image_directories, verbose=0):
if verbose > 0:
image_directory_names = ', '.join([str(image_directory) for image_directory in image_directories])
print(f'Found {len(image_names)} images in {image_directory_names}')
return image_names
return sorted(image_names)
def compute_embeddings(image_names, encoder, embeddings_file, batch_size=50, use_xmp_metadata=False, regenerate=False, verbose=0):