2023-03-15 20:26:19 +00:00
import os
2023-01-09 19:17:36 +00:00
from pathlib import Path
2023-11-21 20:30:43 +00:00
2021-10-03 02:46:29 +00:00
import pytest
2023-10-26 16:42:29 +00:00
from fastapi import FastAPI
2023-11-21 20:30:43 +00:00
from fastapi . staticfiles import StaticFiles
from fastapi . testclient import TestClient
2023-11-15 00:56:26 +00:00
2023-12-28 12:34:02 +00:00
from khoj . configure import (
configure_middleware ,
configure_routes ,
configure_search_types ,
)
2023-11-21 18:56:04 +00:00
from khoj . database . models import (
2024-03-23 16:39:38 +00:00
Agent ,
2023-11-21 20:30:43 +00:00
GithubConfig ,
GithubRepoConfig ,
2023-10-26 19:33:03 +00:00
KhojApiUser ,
2023-11-21 20:30:43 +00:00
KhojUser ,
2023-10-26 16:42:29 +00:00
LocalMarkdownConfig ,
2023-11-21 20:30:43 +00:00
LocalOrgConfig ,
2023-10-26 16:42:29 +00:00
LocalPlaintextConfig ,
)
2023-11-22 06:11:32 +00:00
from khoj . processor . content . org_mode . org_to_entries import OrgToEntries
from khoj . processor . content . plaintext . plaintext_to_entries import PlaintextToEntries
2023-11-21 20:30:43 +00:00
from khoj . processor . embeddings import CrossEncoderModel , EmbeddingsModel
from khoj . routers . indexer import configure_content
2024-04-05 06:40:03 +00:00
from khoj . search_type import text_search
2023-11-21 20:30:43 +00:00
from khoj . utils import fs_syncer , state
2023-07-22 07:28:14 +00:00
from khoj . utils . config import SearchModels
2023-10-15 02:39:13 +00:00
from khoj . utils . constants import web_directory
2023-02-14 20:50:51 +00:00
from khoj . utils . helpers import resolve_absolute_path
2024-04-17 07:30:50 +00:00
from khoj . utils . rawconfig import ContentConfig , ImageSearchConfig , SearchConfig
2023-10-26 18:37:41 +00:00
from tests . helpers import (
2023-11-02 17:43:27 +00:00
ChatModelOptionsFactory ,
2023-11-21 20:30:43 +00:00
OpenAIProcessorConversationConfigFactory ,
2024-04-17 07:52:41 +00:00
ProcessLockFactory ,
2023-11-11 06:38:28 +00:00
SubscriptionFactory ,
2023-11-21 20:30:43 +00:00
UserConversationProcessorConfigFactory ,
UserFactory ,
2023-10-26 18:37:41 +00:00
)
2023-10-26 16:42:29 +00:00
@pytest.fixture ( autouse = True )
def enable_db_access_for_all_tests ( db ) :
pass
2023-02-17 16:04:26 +00:00
@pytest.fixture ( scope = " session " )
2022-09-10 11:15:43 +00:00
def search_config ( ) - > SearchConfig :
2023-12-05 05:35:40 +00:00
state . embeddings_model = dict ( )
state . embeddings_model [ " default " ] = EmbeddingsModel ( )
state . cross_encoder_model = dict ( )
state . cross_encoder_model [ " default " ] = CrossEncoderModel ( )
2023-11-15 00:56:26 +00:00
2023-02-17 16:04:26 +00:00
model_dir = resolve_absolute_path ( " ~/.khoj/search " )
2022-09-10 11:15:43 +00:00
model_dir . mkdir ( parents = True , exist_ok = True )
2022-01-15 01:54:38 +00:00
search_config = SearchConfig ( )
2022-01-15 01:13:14 +00:00
2022-01-15 01:54:38 +00:00
search_config . image = ImageSearchConfig (
2023-07-14 08:19:38 +00:00
encoder = " sentence-transformers/clip-ViT-B-32 " ,
model_directory = model_dir / " image/ " ,
encoder_type = None ,
2022-01-15 01:13:14 +00:00
)
return search_config
2023-10-26 16:42:29 +00:00
@pytest.mark.django_db
@pytest.fixture
def default_user ( ) :
2023-11-11 06:38:28 +00:00
user = UserFactory ( )
SubscriptionFactory ( user = user )
return user
2023-10-26 16:42:29 +00:00
2023-10-26 17:17:29 +00:00
@pytest.mark.django_db
@pytest.fixture
def default_user2 ( ) :
if KhojUser . objects . filter ( username = " default " ) . exists ( ) :
return KhojUser . objects . get ( username = " default " )
2023-11-11 06:38:28 +00:00
user = KhojUser . objects . create (
2023-10-26 17:17:29 +00:00
username = " default " ,
email = " default@example.com " ,
password = " default " ,
)
2023-11-11 06:38:28 +00:00
SubscriptionFactory ( user = user )
return user
2023-10-26 17:17:29 +00:00
2023-11-10 22:00:58 +00:00
@pytest.mark.django_db
@pytest.fixture
def default_user3 ( ) :
"""
This user should not have any data associated with it
"""
if KhojUser . objects . filter ( username = " default3 " ) . exists ( ) :
return KhojUser . objects . get ( username = " default3 " )
2023-11-11 06:38:28 +00:00
user = KhojUser . objects . create (
2023-11-10 22:00:58 +00:00
username = " default3 " ,
email = " default3@example.com " ,
password = " default3 " ,
)
2023-11-11 06:38:28 +00:00
SubscriptionFactory ( user = user )
return user
2023-11-10 22:00:58 +00:00
2023-11-26 21:09:00 +00:00
@pytest.mark.django_db
@pytest.fixture
def default_user4 ( ) :
"""
This user should not have a valid subscription
"""
if KhojUser . objects . filter ( username = " default4 " ) . exists ( ) :
return KhojUser . objects . get ( username = " default4 " )
user = KhojUser . objects . create (
username = " default4 " ,
email = " default4@example.com " ,
password = " default4 " ,
)
SubscriptionFactory ( user = user , renewal_date = None )
return user
2023-10-26 19:33:03 +00:00
@pytest.mark.django_db
@pytest.fixture
def api_user ( default_user ) :
if KhojApiUser . objects . filter ( user = default_user ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user )
return KhojApiUser . objects . create (
user = default_user ,
name = " api-key " ,
token = " kk-secret " ,
)
2023-11-04 21:29:30 +00:00
@pytest.mark.django_db
@pytest.fixture
def api_user2 ( default_user2 ) :
if KhojApiUser . objects . filter ( user = default_user2 ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user2 )
return KhojApiUser . objects . create (
user = default_user2 ,
name = " api-key " ,
token = " kk-diff-secret " ,
)
2023-11-10 22:00:58 +00:00
@pytest.mark.django_db
@pytest.fixture
def api_user3 ( default_user3 ) :
if KhojApiUser . objects . filter ( user = default_user3 ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user3 )
return KhojApiUser . objects . create (
user = default_user3 ,
name = " api-key " ,
token = " kk-diff-secret-3 " ,
)
2023-11-26 21:09:00 +00:00
@pytest.mark.django_db
@pytest.fixture
def api_user4 ( default_user4 ) :
if KhojApiUser . objects . filter ( user = default_user4 ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user4 )
return KhojApiUser . objects . create (
user = default_user4 ,
name = " api-key " ,
token = " kk-diff-secret-4 " ,
)
2024-03-23 16:39:38 +00:00
@pytest.mark.django_db
@pytest.fixture
def offline_agent ( ) :
chat_model = ChatModelOptionsFactory ( )
return Agent . objects . create (
name = " Accountant " ,
chat_model = chat_model ,
personality = " You are a certified CPA. You are able to tell me how much I ' ve spent based on my notes. Regardless of what I ask, you should always respond with the total amount I ' ve spent. ALWAYS RESPOND WITH A SUMMARY TOTAL OF HOW MUCH MONEY I HAVE SPENT. " ,
)
@pytest.mark.django_db
@pytest.fixture
def openai_agent ( ) :
chat_model = ChatModelOptionsFactory ( chat_model = " gpt-3.5-turbo " , model_type = " openai " )
return Agent . objects . create (
name = " Accountant " ,
chat_model = chat_model ,
personality = " You are a certified CPA. You are able to tell me how much I ' ve spent based on my notes. Regardless of what I ask, you should always respond with the total amount I ' ve spent. " ,
)
2023-02-17 16:04:26 +00:00
@pytest.fixture ( scope = " session " )
2023-07-14 08:19:38 +00:00
def search_models ( search_config : SearchConfig ) :
search_models = SearchModels ( )
return search_models
2024-04-17 07:52:41 +00:00
@pytest.mark.django_db
@pytest.fixture
def default_process_lock ( ) :
return ProcessLockFactory ( )
2023-10-26 16:42:29 +00:00
@pytest.fixture
def anyio_backend ( ) :
return " asyncio "
@pytest.mark.django_db
@pytest.fixture ( scope = " function " )
def content_config ( tmp_path_factory , search_models : SearchModels , default_user : KhojUser ) :
2023-02-17 16:04:26 +00:00
content_dir = tmp_path_factory . mktemp ( " content " )
2022-01-15 01:13:14 +00:00
2021-10-03 02:46:29 +00:00
# Generate Image Embeddings from Test Images
2022-08-20 11:21:04 +00:00
content_config = ContentConfig ( )
2021-10-03 02:46:29 +00:00
2023-10-26 16:42:29 +00:00
LocalOrgConfig . objects . create (
2023-02-17 16:04:26 +00:00
input_files = None ,
input_filter = [ " tests/data/org/*.org " ] ,
2023-10-26 16:42:29 +00:00
index_heading_entries = False ,
user = default_user ,
2023-02-17 16:04:26 +00:00
)
2021-10-03 02:46:29 +00:00
2023-11-01 21:51:33 +00:00
text_search . setup ( OrgToEntries , get_sample_data ( " org " ) , regenerate = False , user = default_user )
2021-10-03 02:46:29 +00:00
2023-10-26 16:42:29 +00:00
if os . getenv ( " GITHUB_PAT_TOKEN " ) :
GithubConfig . objects . create (
pat_token = os . getenv ( " GITHUB_PAT_TOKEN " ) ,
user = default_user ,
2023-03-01 01:18:04 +00:00
)
2023-10-26 16:42:29 +00:00
GithubRepoConfig . objects . create (
owner = " khoj-ai " ,
name = " lantern " ,
branch = " master " ,
github_config = GithubConfig . objects . get ( user = default_user ) ,
2023-08-31 19:55:17 +00:00
)
2023-10-26 16:42:29 +00:00
LocalPlaintextConfig . objects . create (
2023-08-27 18:24:30 +00:00
input_files = None ,
input_filter = [ " tests/data/plaintext/*.txt " , " tests/data/plaintext/*.md " , " tests/data/plaintext/*.html " ] ,
2023-10-26 16:42:29 +00:00
user = default_user ,
2023-03-01 01:18:04 +00:00
)
2022-09-10 19:11:43 +00:00
return content_config
2023-01-09 19:17:36 +00:00
2023-03-01 01:26:06 +00:00
@pytest.fixture ( scope = " session " )
2023-10-26 16:42:29 +00:00
def md_content_config ( ) :
markdown_config = LocalMarkdownConfig . objects . create (
2023-03-15 20:26:19 +00:00
input_files = None ,
2023-06-29 18:53:47 +00:00
input_filter = [ " tests/data/markdown/*.markdown " ] ,
2023-03-15 20:26:19 +00:00
)
2023-10-26 16:42:29 +00:00
return markdown_config
2023-03-15 20:26:19 +00:00
2023-10-26 18:37:41 +00:00
@pytest.fixture ( scope = " function " )
def chat_client ( search_config : SearchConfig , default_user2 : KhojUser ) :
2024-01-11 16:51:57 +00:00
return chat_client_builder ( search_config , default_user2 , require_auth = False )
2023-03-15 20:26:19 +00:00
2023-08-01 03:24:52 +00:00
2024-01-11 16:51:57 +00:00
@pytest.fixture ( scope = " function " )
def chat_client_with_auth ( search_config : SearchConfig , default_user2 : KhojUser ) :
return chat_client_builder ( search_config , default_user2 , require_auth = True )
2023-08-01 03:24:52 +00:00
2023-10-26 18:37:41 +00:00
@pytest.fixture ( scope = " function " )
def chat_client_no_background ( search_config : SearchConfig , default_user2 : KhojUser ) :
2024-01-11 16:51:57 +00:00
return chat_client_builder ( search_config , default_user2 , index_content = False , require_auth = False )
@pytest.mark.django_db
def chat_client_builder ( search_config , user , index_content = True , require_auth = False ) :
2023-03-15 20:26:19 +00:00
# Initialize app state
state . config . search_type = search_config
2023-11-15 03:05:09 +00:00
state . SearchType = configure_search_types ( )
2023-03-15 20:26:19 +00:00
2024-01-11 16:51:57 +00:00
if index_content :
LocalMarkdownConfig . objects . create (
input_files = None ,
input_filter = [ " tests/data/markdown/*.markdown " ] ,
user = user ,
)
# Index Markdown Content for Search
all_files = fs_syncer . collect_files ( user = user )
2024-04-05 06:40:03 +00:00
success = configure_content ( all_files , user = user )
2024-01-11 16:51:57 +00:00
2023-03-15 20:26:19 +00:00
# Initialize Processor from Config
2023-10-26 18:37:41 +00:00
if os . getenv ( " OPENAI_API_KEY " ) :
2023-11-11 01:29:23 +00:00
chat_model = ChatModelOptionsFactory ( chat_model = " gpt-3.5-turbo " , model_type = " openai " )
2024-06-09 01:46:55 +00:00
chat_model . openai_config = OpenAIProcessorConversationConfigFactory ( )
2024-01-11 16:51:57 +00:00
UserConversationProcessorConfigFactory ( user = user , setting = chat_model )
2023-10-26 18:37:41 +00:00
2024-01-11 16:51:57 +00:00
state . anonymous_mode = not require_auth
2023-10-26 16:42:29 +00:00
app = FastAPI ( )
2023-03-15 20:26:19 +00:00
configure_routes ( app )
2023-10-15 02:39:13 +00:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-03-15 20:26:19 +00:00
return TestClient ( app )
@pytest.fixture ( scope = " function " )
2023-10-26 16:42:29 +00:00
def fastapi_app ( ) :
app = FastAPI ( )
configure_routes ( app )
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
return app
@pytest.fixture ( scope = " function " )
def client (
content_config : ContentConfig ,
search_config : SearchConfig ,
2023-10-26 19:33:03 +00:00
api_user : KhojApiUser ,
2023-10-26 16:42:29 +00:00
) :
2023-03-01 01:26:06 +00:00
state . config . content_type = content_config
state . config . search_type = search_config
2023-11-15 03:05:09 +00:00
state . SearchType = configure_search_types ( )
2023-12-05 05:35:40 +00:00
state . embeddings_model = dict ( )
state . embeddings_model [ " default " ] = EmbeddingsModel ( )
state . cross_encoder_model = dict ( )
state . cross_encoder_model [ " default " ] = CrossEncoderModel ( )
2023-03-01 01:26:06 +00:00
2023-06-13 23:32:47 +00:00
# These lines help us Mock the Search models for these search types
2023-10-26 16:42:29 +00:00
text_search . setup (
2023-11-01 21:51:33 +00:00
OrgToEntries ,
2023-08-31 19:55:17 +00:00
get_sample_data ( " org " ) ,
regenerate = False ,
2023-10-26 19:33:03 +00:00
user = api_user . user ,
2023-07-14 08:19:38 +00:00
)
2023-10-26 16:42:29 +00:00
text_search . setup (
2023-11-01 21:51:33 +00:00
PlaintextToEntries ,
2023-08-31 19:55:17 +00:00
get_sample_data ( " plaintext " ) ,
regenerate = False ,
2023-10-26 19:33:03 +00:00
user = api_user . user ,
2023-08-31 19:55:17 +00:00
)
2023-06-13 23:32:47 +00:00
2023-10-26 19:33:03 +00:00
state . anonymous_mode = False
2023-08-01 03:24:52 +00:00
2023-11-18 02:22:45 +00:00
app = FastAPI ( )
2023-08-01 03:24:52 +00:00
configure_routes ( app )
2023-10-15 02:39:13 +00:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-08-01 03:24:52 +00:00
return TestClient ( app )
@pytest.fixture ( scope = " function " )
2023-10-26 19:33:03 +00:00
def client_offline_chat ( search_config : SearchConfig , default_user2 : KhojUser ) :
2023-08-27 01:11:18 +00:00
# Initialize app state
2023-08-01 03:24:52 +00:00
state . config . search_type = search_config
2023-11-15 03:05:09 +00:00
state . SearchType = configure_search_types ( )
2023-08-01 03:24:52 +00:00
2023-10-26 18:37:41 +00:00
LocalMarkdownConfig . objects . create (
input_files = None ,
input_filter = [ " tests/data/markdown/*.markdown " ] ,
user = default_user2 ,
)
all_files = fs_syncer . collect_files ( user = default_user2 )
2024-04-05 06:40:03 +00:00
configure_content ( all_files , user = default_user2 )
2023-08-01 03:24:52 +00:00
2023-08-27 01:11:18 +00:00
# Initialize Processor from Config
2024-04-23 17:38:29 +00:00
ChatModelOptionsFactory (
chat_model = " NousResearch/Hermes-2-Pro-Mistral-7B-GGUF " ,
tokenizer = None ,
max_prompt_size = None ,
model_type = " offline " ,
)
2023-11-02 17:43:27 +00:00
UserConversationProcessorConfigFactory ( user = default_user2 )
2023-10-26 18:37:41 +00:00
2023-10-26 16:42:29 +00:00
state . anonymous_mode = True
2023-08-01 03:24:52 +00:00
2023-10-26 19:33:03 +00:00
app = FastAPI ( )
2023-03-01 01:26:06 +00:00
configure_routes ( app )
2023-10-15 02:39:13 +00:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-03-01 01:26:06 +00:00
return TestClient ( app )
2023-02-17 16:04:26 +00:00
@pytest.fixture ( scope = " function " )
2023-10-26 16:42:29 +00:00
def new_org_file ( default_user : KhojUser , content_config : ContentConfig ) :
2023-01-09 19:17:36 +00:00
# Setup
2023-10-26 16:42:29 +00:00
org_config = LocalOrgConfig . objects . filter ( user = default_user ) . first ( )
input_filters = org_config . input_filter
new_org_file = Path ( input_filters [ 0 ] ) . parent / " new_file.org "
2023-01-09 19:17:36 +00:00
new_org_file . touch ( )
yield new_org_file
# Cleanup
if new_org_file . exists ( ) :
new_org_file . unlink ( )
2023-02-17 16:04:26 +00:00
@pytest.fixture ( scope = " function " )
2023-10-26 16:42:29 +00:00
def org_config_with_only_new_file ( new_org_file : Path , default_user : KhojUser ) :
LocalOrgConfig . objects . update ( input_files = [ str ( new_org_file ) ] , input_filter = None )
return LocalOrgConfig . objects . filter ( user = default_user ) . first ( )
2023-08-31 19:55:17 +00:00
@pytest.fixture ( scope = " function " )
def sample_org_data ( ) :
return get_sample_data ( " org " )
def get_sample_data ( type ) :
sample_data = {
" org " : {
2023-11-16 10:47:58 +00:00
" elisp.org " : """
* Emacs Khoj
/ An Emacs interface for [ [ https : / / github . com / khoj - ai / khoj ] [ khoj ] ] /
* * Requirements
- Install and Run [ [ https : / / github . com / khoj - ai / khoj ] [ khoj ] ]
* * Installation
* * * Direct
- Put ~ khoj . el ~ in your Emacs load path . For e . g ~ / . emacs . d / lisp
- Load via ~ use - package ~ in your ~ / . emacs . d / init . el or . emacs file by adding below snippet
#+begin_src elisp
; ; Khoj Package
( use - package khoj
: load - path " ~/.emacs.d/lisp/khoj.el "
: bind ( " C-c s " . ' khoj))
#+end_src
* * * Using [ [ https : / / github . com / quelpa / quelpa #installation][Quelpa]]
- Ensure [ [ https : / / github . com / quelpa / quelpa #installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
- Add below snippet to your ~ / . emacs . d / init . el or . emacs config file and execute it .
#+begin_src elisp
; ; Khoj Package
( use - package khoj
: quelpa ( khoj : fetcher url : url " https://raw.githubusercontent.com/khoj-ai/khoj/master/interface/emacs/khoj.el " )
: bind ( " C-c s " . ' khoj))
#+end_src
* * Usage
1. Call ~ khoj ~ using keybinding ~ C - c s ~ or ~ M - x khoj ~
2. Enter Query in Natural Language
e . g " What is the meaning of life? " " What are my life goals? "
3. Wait for results
* Note : It takes about 15 s on a Mac M1 and a ~ 100 K lines corpus of org - mode files *
4. ( Optional ) Narrow down results further
Include / Exclude specific words from results by adding to query
2023-11-17 22:49:39 +00:00
e . g " What is the meaning of life? -god +none "
2023-11-16 10:47:58 +00:00
""" ,
2023-08-31 19:55:17 +00:00
" readme.org " : """
* Khoj
/ Allow natural language search on user content like notes , images using transformer based models /
All data is processed locally . User can interface with khoj app via [ [ . / interface / emacs / khoj . el ] [ Emacs ] ] , API or Commandline
* * Dependencies
- Python3
- [ [ https : / / docs . conda . io / en / latest / miniconda . html #latest-miniconda-installer-links][Miniconda]]
* * Install
#+begin_src shell
git clone https : / / github . com / khoj - ai / khoj & & cd khoj
conda env create - f environment . yml
conda activate khoj
2023-11-16 10:47:58 +00:00
#+end_src""",
2023-08-31 19:55:17 +00:00
} ,
" markdown " : {
" readme.markdown " : """
# Khoj
Allow natural language search on user content like notes , images using transformer based models
All data is processed locally . User can interface with khoj app via [ Emacs ] ( . / interface / emacs / khoj . el ) , API or Commandline
## Dependencies
- Python3
- [ Miniconda ] ( https : / / docs . conda . io / en / latest / miniconda . html #latest-miniconda-installer-links)
## Install
` ` ` shell
git clone
conda env create - f environment . yml
conda activate khoj
` ` `
"""
} ,
" plaintext " : {
" readme.txt " : """
Khoj
Allow natural language search on user content like notes , images using transformer based models
All data is processed locally . User can interface with khoj app via Emacs , API or Commandline
Dependencies
- Python3
- Miniconda
Install
git clone
conda env create - f environment . yml
conda activate khoj
"""
} ,
}
return sample_data [ type ]