2022-09-05 00:05:13 +02:00
# External Packages
2023-03-15 21:26:19 +01:00
import os
2023-03-01 02:26:06 +01:00
from fastapi . testclient import TestClient
2023-01-09 20:17:36 +01:00
from pathlib import Path
2021-10-03 04:46:29 +02:00
import pytest
2023-10-15 04:39:13 +02:00
from fastapi . staticfiles import StaticFiles
2023-10-26 18:42:29 +02:00
from fastapi import FastAPI
import os
from fastapi import FastAPI
2023-11-15 01:56:26 +01:00
2021-10-03 04:46:29 +02:00
# Internal Packages
2023-10-26 20:37:41 +02:00
from khoj . configure import configure_routes , configure_search_types , configure_middleware
2023-11-15 01:56:26 +01:00
from khoj . processor . embeddings import CrossEncoderModel , EmbeddingsModel
2023-11-17 02:19:55 +01:00
from khoj . processor . data_sources . plaintext . plaintext_to_entries import PlaintextToEntries
2023-02-14 21:50:51 +01:00
from khoj . search_type import image_search , text_search
2023-07-22 09:28:14 +02:00
from khoj . utils . config import SearchModels
2023-10-15 04:39:13 +02:00
from khoj . utils . constants import web_directory
2023-02-14 21:50:51 +01:00
from khoj . utils . helpers import resolve_absolute_path
2023-02-17 17:04:26 +01:00
from khoj . utils . rawconfig import (
ContentConfig ,
ImageContentConfig ,
SearchConfig ,
ImageSearchConfig ,
)
2023-09-18 23:41:26 +02:00
from khoj . utils import state , fs_syncer
from khoj . routers . indexer import configure_content
2023-11-17 02:19:55 +01:00
from khoj . processor . data_sources . org_mode . org_to_entries import OrgToEntries
2023-10-26 18:42:29 +02:00
from database . models import (
2023-10-26 21:33:03 +02:00
KhojApiUser ,
2023-10-26 18:42:29 +02:00
LocalOrgConfig ,
LocalMarkdownConfig ,
LocalPlaintextConfig ,
GithubConfig ,
KhojUser ,
GithubRepoConfig ,
)
2023-10-26 20:37:41 +02:00
from tests . helpers import (
UserFactory ,
2023-11-02 18:43:27 +01:00
ChatModelOptionsFactory ,
2023-10-26 20:37:41 +02:00
OpenAIProcessorConversationConfigFactory ,
OfflineChatProcessorConversationConfigFactory ,
2023-11-02 18:43:27 +01:00
UserConversationProcessorConfigFactory ,
2023-11-11 07:38:28 +01:00
SubscriptionFactory ,
2023-10-26 20:37:41 +02:00
)
2023-10-26 18:42:29 +02:00
@pytest.fixture ( autouse = True )
def enable_db_access_for_all_tests ( db ) :
pass
2023-02-17 17:04:26 +01:00
@pytest.fixture ( scope = " session " )
2022-09-10 13:15:43 +02:00
def search_config ( ) - > SearchConfig :
2023-11-15 01:56:26 +01:00
state . embeddings_model = EmbeddingsModel ( )
state . cross_encoder_model = CrossEncoderModel ( )
2023-02-17 17:04:26 +01:00
model_dir = resolve_absolute_path ( " ~/.khoj/search " )
2022-09-10 13:15:43 +02:00
model_dir . mkdir ( parents = True , exist_ok = True )
2022-01-15 02:54:38 +01:00
search_config = SearchConfig ( )
2022-01-15 02:13:14 +01:00
2022-01-15 02:54:38 +01:00
search_config . image = ImageSearchConfig (
2023-07-14 10:19:38 +02:00
encoder = " sentence-transformers/clip-ViT-B-32 " ,
model_directory = model_dir / " image/ " ,
encoder_type = None ,
2022-01-15 02:13:14 +01:00
)
return search_config
2023-10-26 18:42:29 +02:00
@pytest.mark.django_db
@pytest.fixture
def default_user ( ) :
2023-11-11 07:38:28 +01:00
user = UserFactory ( )
SubscriptionFactory ( user = user )
return user
2023-10-26 18:42:29 +02:00
2023-10-26 19:17:29 +02:00
@pytest.mark.django_db
@pytest.fixture
def default_user2 ( ) :
if KhojUser . objects . filter ( username = " default " ) . exists ( ) :
return KhojUser . objects . get ( username = " default " )
2023-11-11 07:38:28 +01:00
user = KhojUser . objects . create (
2023-10-26 19:17:29 +02:00
username = " default " ,
email = " default@example.com " ,
password = " default " ,
)
2023-11-11 07:38:28 +01:00
SubscriptionFactory ( user = user )
return user
2023-10-26 19:17:29 +02:00
2023-11-10 23:00:58 +01:00
@pytest.mark.django_db
@pytest.fixture
def default_user3 ( ) :
"""
This user should not have any data associated with it
"""
if KhojUser . objects . filter ( username = " default3 " ) . exists ( ) :
return KhojUser . objects . get ( username = " default3 " )
2023-11-11 07:38:28 +01:00
user = KhojUser . objects . create (
2023-11-10 23:00:58 +01:00
username = " default3 " ,
email = " default3@example.com " ,
password = " default3 " ,
)
2023-11-11 07:38:28 +01:00
SubscriptionFactory ( user = user )
return user
2023-11-10 23:00:58 +01:00
2023-10-26 21:33:03 +02:00
@pytest.mark.django_db
@pytest.fixture
def api_user ( default_user ) :
if KhojApiUser . objects . filter ( user = default_user ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user )
return KhojApiUser . objects . create (
user = default_user ,
name = " api-key " ,
token = " kk-secret " ,
)
2023-11-04 22:29:30 +01:00
@pytest.mark.django_db
@pytest.fixture
def api_user2 ( default_user2 ) :
if KhojApiUser . objects . filter ( user = default_user2 ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user2 )
return KhojApiUser . objects . create (
user = default_user2 ,
name = " api-key " ,
token = " kk-diff-secret " ,
)
2023-11-10 23:00:58 +01:00
@pytest.mark.django_db
@pytest.fixture
def api_user3 ( default_user3 ) :
if KhojApiUser . objects . filter ( user = default_user3 ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user3 )
return KhojApiUser . objects . create (
user = default_user3 ,
name = " api-key " ,
token = " kk-diff-secret-3 " ,
)
2023-02-17 17:04:26 +01:00
@pytest.fixture ( scope = " session " )
2023-07-14 10:19:38 +02:00
def search_models ( search_config : SearchConfig ) :
search_models = SearchModels ( )
search_models . image_search = image_search . initialize_model ( search_config . image )
return search_models
2023-10-26 18:42:29 +02:00
@pytest.fixture
def anyio_backend ( ) :
return " asyncio "
@pytest.mark.django_db
@pytest.fixture ( scope = " function " )
def content_config ( tmp_path_factory , search_models : SearchModels , default_user : KhojUser ) :
2023-02-17 17:04:26 +01:00
content_dir = tmp_path_factory . mktemp ( " content " )
2022-01-15 02:13:14 +01:00
2021-10-03 04:46:29 +02:00
# Generate Image Embeddings from Test Images
2022-08-20 13:21:04 +02:00
content_config = ContentConfig ( )
content_config . image = ImageContentConfig (
2023-07-14 10:19:38 +02:00
input_filter = None ,
2023-02-17 17:04:26 +01:00
input_directories = [ " tests/data/images " ] ,
embeddings_file = content_dir . joinpath ( " image_embeddings.pt " ) ,
batch_size = 1 ,
use_xmp_metadata = False ,
)
2021-10-03 04:46:29 +02:00
2023-07-14 10:19:38 +02:00
image_search . setup ( content_config . image , search_models . image_search . image_encoder , regenerate = False )
2021-10-03 04:46:29 +02:00
2023-10-26 18:42:29 +02:00
LocalOrgConfig . objects . create (
2023-02-17 17:04:26 +01:00
input_files = None ,
input_filter = [ " tests/data/org/*.org " ] ,
2023-10-26 18:42:29 +02:00
index_heading_entries = False ,
user = default_user ,
2023-02-17 17:04:26 +01:00
)
2021-10-03 04:46:29 +02:00
2023-11-01 22:51:33 +01:00
text_search . setup ( OrgToEntries , get_sample_data ( " org " ) , regenerate = False , user = default_user )
2021-10-03 04:46:29 +02:00
2023-10-26 18:42:29 +02:00
if os . getenv ( " GITHUB_PAT_TOKEN " ) :
GithubConfig . objects . create (
pat_token = os . getenv ( " GITHUB_PAT_TOKEN " ) ,
user = default_user ,
2023-03-01 02:18:04 +01:00
)
2023-10-26 18:42:29 +02:00
GithubRepoConfig . objects . create (
owner = " khoj-ai " ,
name = " lantern " ,
branch = " master " ,
github_config = GithubConfig . objects . get ( user = default_user ) ,
2023-08-31 21:55:17 +02:00
)
2023-10-26 18:42:29 +02:00
LocalPlaintextConfig . objects . create (
2023-08-27 20:24:30 +02:00
input_files = None ,
input_filter = [ " tests/data/plaintext/*.txt " , " tests/data/plaintext/*.md " , " tests/data/plaintext/*.html " ] ,
2023-10-26 18:42:29 +02:00
user = default_user ,
2023-03-01 02:18:04 +01:00
)
2022-09-10 21:11:43 +02:00
return content_config
2023-01-09 20:17:36 +01:00
2023-03-01 02:26:06 +01:00
@pytest.fixture ( scope = " session " )
2023-10-26 18:42:29 +02:00
def md_content_config ( ) :
markdown_config = LocalMarkdownConfig . objects . create (
2023-03-15 21:26:19 +01:00
input_files = None ,
2023-06-29 20:53:47 +02:00
input_filter = [ " tests/data/markdown/*.markdown " ] ,
2023-03-15 21:26:19 +01:00
)
2023-10-26 18:42:29 +02:00
return markdown_config
2023-03-15 21:26:19 +01:00
2023-10-26 20:37:41 +02:00
@pytest.fixture ( scope = " function " )
def chat_client ( search_config : SearchConfig , default_user2 : KhojUser ) :
# Initialize app state
state . config . search_type = search_config
2023-11-15 04:05:09 +01:00
state . SearchType = configure_search_types ( )
2023-10-26 20:37:41 +02:00
LocalMarkdownConfig . objects . create (
input_files = None ,
input_filter = [ " tests/data/markdown/*.markdown " ] ,
user = default_user2 ,
2023-03-15 21:26:19 +01:00
)
2023-10-26 20:37:41 +02:00
# Index Markdown Content for Search
all_files = fs_syncer . collect_files ( user = default_user2 )
2023-11-07 11:20:11 +01:00
state . content_index , _ = configure_content (
2023-10-26 20:37:41 +02:00
state . content_index , state . config . content_type , all_files , state . search_models , user = default_user2
)
2023-03-15 21:26:19 +01:00
2023-10-26 20:37:41 +02:00
# Initialize Processor from Config
if os . getenv ( " OPENAI_API_KEY " ) :
2023-11-02 18:43:27 +01:00
chat_model = ChatModelOptionsFactory ( chat_model = " gpt-3.5-turbo " , model_type = " openai " )
OpenAIProcessorConversationConfigFactory ( )
UserConversationProcessorConfigFactory ( user = default_user2 , setting = chat_model )
2023-03-15 21:26:19 +01:00
2023-11-11 02:29:23 +01:00
state . anonymous_mode = True
2023-08-01 05:24:52 +02:00
2023-10-26 20:37:41 +02:00
app = FastAPI ( )
2023-08-01 05:24:52 +02:00
2023-10-26 20:37:41 +02:00
configure_routes ( app )
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
return TestClient ( app )
2023-08-01 05:24:52 +02:00
2023-10-26 20:37:41 +02:00
@pytest.fixture ( scope = " function " )
def chat_client_no_background ( search_config : SearchConfig , default_user2 : KhojUser ) :
2023-03-15 21:26:19 +01:00
# Initialize app state
state . config . search_type = search_config
2023-11-15 04:05:09 +01:00
state . SearchType = configure_search_types ( )
2023-03-15 21:26:19 +01:00
# Initialize Processor from Config
2023-10-26 20:37:41 +02:00
if os . getenv ( " OPENAI_API_KEY " ) :
2023-11-11 02:29:23 +01:00
chat_model = ChatModelOptionsFactory ( chat_model = " gpt-3.5-turbo " , model_type = " openai " )
2023-11-03 06:44:25 +01:00
OpenAIProcessorConversationConfigFactory ( )
2023-11-11 02:29:23 +01:00
UserConversationProcessorConfigFactory ( user = default_user2 , setting = chat_model )
2023-10-26 20:37:41 +02:00
2023-10-26 18:42:29 +02:00
state . anonymous_mode = True
app = FastAPI ( )
2023-03-15 21:26:19 +01:00
configure_routes ( app )
2023-10-15 04:39:13 +02:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-03-15 21:26:19 +01:00
return TestClient ( app )
@pytest.fixture ( scope = " function " )
2023-10-26 18:42:29 +02:00
def fastapi_app ( ) :
app = FastAPI ( )
configure_routes ( app )
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
return app
@pytest.fixture ( scope = " function " )
def client (
content_config : ContentConfig ,
search_config : SearchConfig ,
2023-10-26 21:33:03 +02:00
api_user : KhojApiUser ,
2023-10-26 18:42:29 +02:00
) :
2023-03-01 02:26:06 +01:00
state . config . content_type = content_config
state . config . search_type = search_config
2023-11-15 04:05:09 +01:00
state . SearchType = configure_search_types ( )
2023-11-15 01:56:26 +01:00
state . embeddings_model = EmbeddingsModel ( )
state . cross_encoder_model = CrossEncoderModel ( )
2023-03-01 02:26:06 +01:00
2023-06-14 01:32:47 +02:00
# These lines help us Mock the Search models for these search types
2023-07-14 10:19:38 +02:00
state . search_models . image_search = image_search . initialize_model ( search_config . image )
2023-10-26 18:42:29 +02:00
text_search . setup (
2023-11-01 22:51:33 +01:00
OrgToEntries ,
2023-08-31 21:55:17 +02:00
get_sample_data ( " org " ) ,
regenerate = False ,
2023-10-26 21:33:03 +02:00
user = api_user . user ,
2023-07-14 10:19:38 +02:00
)
state . content_index . image = image_search . setup (
content_config . image , state . search_models . image_search , regenerate = False
)
2023-10-26 18:42:29 +02:00
text_search . setup (
2023-11-01 22:51:33 +01:00
PlaintextToEntries ,
2023-08-31 21:55:17 +02:00
get_sample_data ( " plaintext " ) ,
regenerate = False ,
2023-10-26 21:33:03 +02:00
user = api_user . user ,
2023-08-31 21:55:17 +02:00
)
2023-06-14 01:32:47 +02:00
2023-10-26 21:33:03 +02:00
state . anonymous_mode = False
2023-08-01 05:24:52 +02:00
2023-11-18 03:22:45 +01:00
app = FastAPI ( )
2023-08-01 05:24:52 +02:00
configure_routes ( app )
2023-10-15 04:39:13 +02:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-08-01 05:24:52 +02:00
return TestClient ( app )
@pytest.fixture ( scope = " function " )
2023-10-26 21:33:03 +02:00
def client_offline_chat ( search_config : SearchConfig , default_user2 : KhojUser ) :
2023-08-27 03:11:18 +02:00
# Initialize app state
2023-08-01 05:24:52 +02:00
state . config . search_type = search_config
2023-11-15 04:05:09 +01:00
state . SearchType = configure_search_types ( )
2023-08-01 05:24:52 +02:00
2023-10-26 20:37:41 +02:00
LocalMarkdownConfig . objects . create (
input_files = None ,
input_filter = [ " tests/data/markdown/*.markdown " ] ,
user = default_user2 ,
)
all_files = fs_syncer . collect_files ( user = default_user2 )
configure_content (
state . content_index , state . config . content_type , all_files , state . search_models , user = default_user2
2023-08-01 05:24:52 +02:00
)
2023-08-27 03:11:18 +02:00
# Initialize Processor from Config
2023-11-02 18:43:27 +01:00
OfflineChatProcessorConversationConfigFactory ( enabled = True )
UserConversationProcessorConfigFactory ( user = default_user2 )
2023-10-26 20:37:41 +02:00
2023-10-26 18:42:29 +02:00
state . anonymous_mode = True
2023-08-01 05:24:52 +02:00
2023-10-26 21:33:03 +02:00
app = FastAPI ( )
2023-03-01 02:26:06 +01:00
configure_routes ( app )
2023-10-15 04:39:13 +02:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-03-01 02:26:06 +01:00
return TestClient ( app )
2023-02-17 17:04:26 +01:00
@pytest.fixture ( scope = " function " )
2023-10-26 18:42:29 +02:00
def new_org_file ( default_user : KhojUser , content_config : ContentConfig ) :
2023-01-09 20:17:36 +01:00
# Setup
2023-10-26 18:42:29 +02:00
org_config = LocalOrgConfig . objects . filter ( user = default_user ) . first ( )
input_filters = org_config . input_filter
new_org_file = Path ( input_filters [ 0 ] ) . parent / " new_file.org "
2023-01-09 20:17:36 +01:00
new_org_file . touch ( )
yield new_org_file
# Cleanup
if new_org_file . exists ( ) :
new_org_file . unlink ( )
2023-02-17 17:04:26 +01:00
@pytest.fixture ( scope = " function " )
2023-10-26 18:42:29 +02:00
def org_config_with_only_new_file ( new_org_file : Path , default_user : KhojUser ) :
LocalOrgConfig . objects . update ( input_files = [ str ( new_org_file ) ] , input_filter = None )
return LocalOrgConfig . objects . filter ( user = default_user ) . first ( )
2023-08-31 21:55:17 +02:00
@pytest.fixture ( scope = " function " )
def sample_org_data ( ) :
return get_sample_data ( " org " )
def get_sample_data ( type ) :
sample_data = {
" org " : {
2023-11-16 11:47:58 +01:00
" elisp.org " : """
* Emacs Khoj
/ An Emacs interface for [ [ https : / / github . com / khoj - ai / khoj ] [ khoj ] ] /
* * Requirements
- Install and Run [ [ https : / / github . com / khoj - ai / khoj ] [ khoj ] ]
* * Installation
* * * Direct
- Put ~ khoj . el ~ in your Emacs load path . For e . g ~ / . emacs . d / lisp
- Load via ~ use - package ~ in your ~ / . emacs . d / init . el or . emacs file by adding below snippet
#+begin_src elisp
; ; Khoj Package
( use - package khoj
: load - path " ~/.emacs.d/lisp/khoj.el "
: bind ( " C-c s " . ' khoj))
#+end_src
* * * Using [ [ https : / / github . com / quelpa / quelpa #installation][Quelpa]]
- Ensure [ [ https : / / github . com / quelpa / quelpa #installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
- Add below snippet to your ~ / . emacs . d / init . el or . emacs config file and execute it .
#+begin_src elisp
; ; Khoj Package
( use - package khoj
: quelpa ( khoj : fetcher url : url " https://raw.githubusercontent.com/khoj-ai/khoj/master/interface/emacs/khoj.el " )
: bind ( " C-c s " . ' khoj))
#+end_src
* * Usage
1. Call ~ khoj ~ using keybinding ~ C - c s ~ or ~ M - x khoj ~
2. Enter Query in Natural Language
e . g " What is the meaning of life? " " What are my life goals? "
3. Wait for results
* Note : It takes about 15 s on a Mac M1 and a ~ 100 K lines corpus of org - mode files *
4. ( Optional ) Narrow down results further
Include / Exclude specific words from results by adding to query
2023-11-17 23:49:39 +01:00
e . g " What is the meaning of life? -god +none "
2023-11-16 11:47:58 +01:00
""" ,
2023-08-31 21:55:17 +02:00
" readme.org " : """
* Khoj
/ Allow natural language search on user content like notes , images using transformer based models /
All data is processed locally . User can interface with khoj app via [ [ . / interface / emacs / khoj . el ] [ Emacs ] ] , API or Commandline
* * Dependencies
- Python3
- [ [ https : / / docs . conda . io / en / latest / miniconda . html #latest-miniconda-installer-links][Miniconda]]
* * Install
#+begin_src shell
git clone https : / / github . com / khoj - ai / khoj & & cd khoj
conda env create - f environment . yml
conda activate khoj
2023-11-16 11:47:58 +01:00
#+end_src""",
2023-08-31 21:55:17 +02:00
} ,
" markdown " : {
" readme.markdown " : """
# Khoj
Allow natural language search on user content like notes , images using transformer based models
All data is processed locally . User can interface with khoj app via [ Emacs ] ( . / interface / emacs / khoj . el ) , API or Commandline
## Dependencies
- Python3
- [ Miniconda ] ( https : / / docs . conda . io / en / latest / miniconda . html #latest-miniconda-installer-links)
## Install
` ` ` shell
git clone
conda env create - f environment . yml
conda activate khoj
` ` `
"""
} ,
" plaintext " : {
" readme.txt " : """
Khoj
Allow natural language search on user content like notes , images using transformer based models
All data is processed locally . User can interface with khoj app via Emacs , API or Commandline
Dependencies
- Python3
- Miniconda
Install
git clone
conda env create - f environment . yml
conda activate khoj
"""
} ,
}
return sample_data [ type ]