2023-03-15 21:26:19 +01:00
import os
2023-01-09 20:17:36 +01:00
from pathlib import Path
2023-11-21 21:30:43 +01:00
2021-10-03 04:46:29 +02:00
import pytest
2023-10-26 18:42:29 +02:00
from fastapi import FastAPI
2023-11-21 21:30:43 +01:00
from fastapi . staticfiles import StaticFiles
from fastapi . testclient import TestClient
2023-11-15 01:56:26 +01:00
2023-12-28 13:34:02 +01:00
from khoj . configure import (
configure_middleware ,
configure_routes ,
configure_search_types ,
)
2023-11-21 19:56:04 +01:00
from khoj . database . models import (
2024-03-23 17:39:38 +01:00
Agent ,
2023-11-21 21:30:43 +01:00
GithubConfig ,
GithubRepoConfig ,
2023-10-26 21:33:03 +02:00
KhojApiUser ,
2023-11-21 21:30:43 +01:00
KhojUser ,
2023-10-26 18:42:29 +02:00
LocalMarkdownConfig ,
2023-11-21 21:30:43 +01:00
LocalOrgConfig ,
2024-06-18 16:01:07 +02:00
LocalPdfConfig ,
2023-10-26 18:42:29 +02:00
LocalPlaintextConfig ,
)
2023-11-22 07:11:32 +01:00
from khoj . processor . content . org_mode . org_to_entries import OrgToEntries
from khoj . processor . content . plaintext . plaintext_to_entries import PlaintextToEntries
2023-11-21 21:30:43 +01:00
from khoj . processor . embeddings import CrossEncoderModel , EmbeddingsModel
2024-07-18 19:00:52 +02:00
from khoj . routers . api_content import configure_content
2024-04-05 08:40:03 +02:00
from khoj . search_type import text_search
2023-11-21 21:30:43 +01:00
from khoj . utils import fs_syncer , state
2023-07-22 09:28:14 +02:00
from khoj . utils . config import SearchModels
2023-10-15 04:39:13 +02:00
from khoj . utils . constants import web_directory
2023-02-14 21:50:51 +01:00
from khoj . utils . helpers import resolve_absolute_path
2024-04-17 09:30:50 +02:00
from khoj . utils . rawconfig import ContentConfig , ImageSearchConfig , SearchConfig
2023-10-26 20:37:41 +02:00
from tests . helpers import (
2023-11-02 18:43:27 +01:00
ChatModelOptionsFactory ,
2023-11-21 21:30:43 +01:00
OpenAIProcessorConversationConfigFactory ,
2024-04-17 09:52:41 +02:00
ProcessLockFactory ,
2023-11-11 07:38:28 +01:00
SubscriptionFactory ,
2023-11-21 21:30:43 +01:00
UserConversationProcessorConfigFactory ,
UserFactory ,
2023-10-26 20:37:41 +02:00
)
2023-10-26 18:42:29 +02:00
@pytest.fixture ( autouse = True )
def enable_db_access_for_all_tests ( db ) :
pass
2023-02-17 17:04:26 +01:00
@pytest.fixture ( scope = " session " )
2022-09-10 13:15:43 +02:00
def search_config ( ) - > SearchConfig :
2023-12-05 06:35:40 +01:00
state . embeddings_model = dict ( )
state . embeddings_model [ " default " ] = EmbeddingsModel ( )
state . cross_encoder_model = dict ( )
state . cross_encoder_model [ " default " ] = CrossEncoderModel ( )
2023-11-15 01:56:26 +01:00
2023-02-17 17:04:26 +01:00
model_dir = resolve_absolute_path ( " ~/.khoj/search " )
2022-09-10 13:15:43 +02:00
model_dir . mkdir ( parents = True , exist_ok = True )
2022-01-15 02:54:38 +01:00
search_config = SearchConfig ( )
2022-01-15 02:13:14 +01:00
2022-01-15 02:54:38 +01:00
search_config . image = ImageSearchConfig (
2023-07-14 10:19:38 +02:00
encoder = " sentence-transformers/clip-ViT-B-32 " ,
model_directory = model_dir / " image/ " ,
encoder_type = None ,
2022-01-15 02:13:14 +01:00
)
return search_config
2023-10-26 18:42:29 +02:00
@pytest.mark.django_db
@pytest.fixture
def default_user ( ) :
2023-11-11 07:38:28 +01:00
user = UserFactory ( )
SubscriptionFactory ( user = user )
return user
2023-10-26 18:42:29 +02:00
2023-10-26 19:17:29 +02:00
@pytest.mark.django_db
@pytest.fixture
def default_user2 ( ) :
if KhojUser . objects . filter ( username = " default " ) . exists ( ) :
return KhojUser . objects . get ( username = " default " )
2023-11-11 07:38:28 +01:00
user = KhojUser . objects . create (
2023-10-26 19:17:29 +02:00
username = " default " ,
email = " default@example.com " ,
password = " default " ,
)
2023-11-11 07:38:28 +01:00
SubscriptionFactory ( user = user )
return user
2023-10-26 19:17:29 +02:00
2023-11-10 23:00:58 +01:00
@pytest.mark.django_db
@pytest.fixture
def default_user3 ( ) :
"""
This user should not have any data associated with it
"""
if KhojUser . objects . filter ( username = " default3 " ) . exists ( ) :
return KhojUser . objects . get ( username = " default3 " )
2023-11-11 07:38:28 +01:00
user = KhojUser . objects . create (
2023-11-10 23:00:58 +01:00
username = " default3 " ,
email = " default3@example.com " ,
password = " default3 " ,
)
2023-11-11 07:38:28 +01:00
SubscriptionFactory ( user = user )
return user
2023-11-10 23:00:58 +01:00
2023-11-26 22:09:00 +01:00
@pytest.mark.django_db
@pytest.fixture
def default_user4 ( ) :
"""
This user should not have a valid subscription
"""
if KhojUser . objects . filter ( username = " default4 " ) . exists ( ) :
return KhojUser . objects . get ( username = " default4 " )
user = KhojUser . objects . create (
username = " default4 " ,
email = " default4@example.com " ,
password = " default4 " ,
)
SubscriptionFactory ( user = user , renewal_date = None )
return user
2023-10-26 21:33:03 +02:00
@pytest.mark.django_db
@pytest.fixture
def api_user ( default_user ) :
if KhojApiUser . objects . filter ( user = default_user ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user )
return KhojApiUser . objects . create (
user = default_user ,
name = " api-key " ,
token = " kk-secret " ,
)
2023-11-04 22:29:30 +01:00
@pytest.mark.django_db
@pytest.fixture
def api_user2 ( default_user2 ) :
if KhojApiUser . objects . filter ( user = default_user2 ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user2 )
return KhojApiUser . objects . create (
user = default_user2 ,
name = " api-key " ,
token = " kk-diff-secret " ,
)
2023-11-10 23:00:58 +01:00
@pytest.mark.django_db
@pytest.fixture
def api_user3 ( default_user3 ) :
if KhojApiUser . objects . filter ( user = default_user3 ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user3 )
return KhojApiUser . objects . create (
user = default_user3 ,
name = " api-key " ,
token = " kk-diff-secret-3 " ,
)
2023-11-26 22:09:00 +01:00
@pytest.mark.django_db
@pytest.fixture
def api_user4 ( default_user4 ) :
if KhojApiUser . objects . filter ( user = default_user4 ) . exists ( ) :
return KhojApiUser . objects . get ( user = default_user4 )
return KhojApiUser . objects . create (
user = default_user4 ,
name = " api-key " ,
token = " kk-diff-secret-4 " ,
)
2024-03-23 17:39:38 +01:00
@pytest.mark.django_db
@pytest.fixture
def offline_agent ( ) :
chat_model = ChatModelOptionsFactory ( )
return Agent . objects . create (
name = " Accountant " ,
chat_model = chat_model ,
personality = " You are a certified CPA. You are able to tell me how much I ' ve spent based on my notes. Regardless of what I ask, you should always respond with the total amount I ' ve spent. ALWAYS RESPOND WITH A SUMMARY TOTAL OF HOW MUCH MONEY I HAVE SPENT. " ,
)
@pytest.mark.django_db
@pytest.fixture
def openai_agent ( ) :
2024-08-23 04:04:49 +02:00
chat_model = ChatModelOptionsFactory ( chat_model = " gpt-4o-mini " , model_type = " openai " )
2024-03-23 17:39:38 +01:00
return Agent . objects . create (
name = " Accountant " ,
chat_model = chat_model ,
personality = " You are a certified CPA. You are able to tell me how much I ' ve spent based on my notes. Regardless of what I ask, you should always respond with the total amount I ' ve spent. " ,
)
2023-02-17 17:04:26 +01:00
@pytest.fixture ( scope = " session " )
2023-07-14 10:19:38 +02:00
def search_models ( search_config : SearchConfig ) :
search_models = SearchModels ( )
return search_models
2024-04-17 09:52:41 +02:00
@pytest.mark.django_db
@pytest.fixture
def default_process_lock ( ) :
return ProcessLockFactory ( )
2023-10-26 18:42:29 +02:00
@pytest.fixture
def anyio_backend ( ) :
return " asyncio "
@pytest.mark.django_db
@pytest.fixture ( scope = " function " )
def content_config ( tmp_path_factory , search_models : SearchModels , default_user : KhojUser ) :
2023-02-17 17:04:26 +01:00
content_dir = tmp_path_factory . mktemp ( " content " )
2022-01-15 02:13:14 +01:00
2021-10-03 04:46:29 +02:00
# Generate Image Embeddings from Test Images
2022-08-20 13:21:04 +02:00
content_config = ContentConfig ( )
2021-10-03 04:46:29 +02:00
2023-10-26 18:42:29 +02:00
LocalOrgConfig . objects . create (
2023-02-17 17:04:26 +01:00
input_files = None ,
input_filter = [ " tests/data/org/*.org " ] ,
2023-10-26 18:42:29 +02:00
index_heading_entries = False ,
user = default_user ,
2023-02-17 17:04:26 +01:00
)
2021-10-03 04:46:29 +02:00
2023-11-01 22:51:33 +01:00
text_search . setup ( OrgToEntries , get_sample_data ( " org " ) , regenerate = False , user = default_user )
2021-10-03 04:46:29 +02:00
2023-10-26 18:42:29 +02:00
if os . getenv ( " GITHUB_PAT_TOKEN " ) :
GithubConfig . objects . create (
pat_token = os . getenv ( " GITHUB_PAT_TOKEN " ) ,
user = default_user ,
2023-03-01 02:18:04 +01:00
)
2023-10-26 18:42:29 +02:00
GithubRepoConfig . objects . create (
owner = " khoj-ai " ,
name = " lantern " ,
branch = " master " ,
github_config = GithubConfig . objects . get ( user = default_user ) ,
2023-08-31 21:55:17 +02:00
)
2023-10-26 18:42:29 +02:00
LocalPlaintextConfig . objects . create (
2023-08-27 20:24:30 +02:00
input_files = None ,
input_filter = [ " tests/data/plaintext/*.txt " , " tests/data/plaintext/*.md " , " tests/data/plaintext/*.html " ] ,
2023-10-26 18:42:29 +02:00
user = default_user ,
2023-03-01 02:18:04 +01:00
)
2022-09-10 21:11:43 +02:00
return content_config
2023-01-09 20:17:36 +01:00
2023-03-01 02:26:06 +01:00
@pytest.fixture ( scope = " session " )
2023-10-26 18:42:29 +02:00
def md_content_config ( ) :
markdown_config = LocalMarkdownConfig . objects . create (
2023-03-15 21:26:19 +01:00
input_files = None ,
2023-06-29 20:53:47 +02:00
input_filter = [ " tests/data/markdown/*.markdown " ] ,
2023-03-15 21:26:19 +01:00
)
2023-10-26 18:42:29 +02:00
return markdown_config
2023-03-15 21:26:19 +01:00
2023-10-26 20:37:41 +02:00
@pytest.fixture ( scope = " function " )
def chat_client ( search_config : SearchConfig , default_user2 : KhojUser ) :
2024-01-11 17:51:57 +01:00
return chat_client_builder ( search_config , default_user2 , require_auth = False )
2023-03-15 21:26:19 +01:00
2023-08-01 05:24:52 +02:00
2024-01-11 17:51:57 +01:00
@pytest.fixture ( scope = " function " )
def chat_client_with_auth ( search_config : SearchConfig , default_user2 : KhojUser ) :
return chat_client_builder ( search_config , default_user2 , require_auth = True )
2023-08-01 05:24:52 +02:00
2023-10-26 20:37:41 +02:00
@pytest.fixture ( scope = " function " )
def chat_client_no_background ( search_config : SearchConfig , default_user2 : KhojUser ) :
2024-01-11 17:51:57 +01:00
return chat_client_builder ( search_config , default_user2 , index_content = False , require_auth = False )
@pytest.mark.django_db
def chat_client_builder ( search_config , user , index_content = True , require_auth = False ) :
2023-03-15 21:26:19 +01:00
# Initialize app state
state . config . search_type = search_config
2023-11-15 04:05:09 +01:00
state . SearchType = configure_search_types ( )
2023-03-15 21:26:19 +01:00
2024-01-11 17:51:57 +01:00
if index_content :
LocalMarkdownConfig . objects . create (
input_files = None ,
input_filter = [ " tests/data/markdown/*.markdown " ] ,
user = user ,
)
# Index Markdown Content for Search
all_files = fs_syncer . collect_files ( user = user )
2024-04-05 08:40:03 +02:00
success = configure_content ( all_files , user = user )
2024-01-11 17:51:57 +01:00
2023-03-15 21:26:19 +01:00
# Initialize Processor from Config
2023-10-26 20:37:41 +02:00
if os . getenv ( " OPENAI_API_KEY " ) :
2024-08-23 04:04:49 +02:00
chat_model = ChatModelOptionsFactory ( chat_model = " gpt-4o-mini " , model_type = " openai " )
2024-06-09 03:46:55 +02:00
chat_model . openai_config = OpenAIProcessorConversationConfigFactory ( )
2024-01-11 17:51:57 +01:00
UserConversationProcessorConfigFactory ( user = user , setting = chat_model )
2023-10-26 20:37:41 +02:00
2024-01-11 17:51:57 +01:00
state . anonymous_mode = not require_auth
2023-10-26 18:42:29 +02:00
app = FastAPI ( )
2023-03-15 21:26:19 +01:00
configure_routes ( app )
2023-10-15 04:39:13 +02:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-03-15 21:26:19 +01:00
return TestClient ( app )
@pytest.fixture ( scope = " function " )
2023-10-26 18:42:29 +02:00
def fastapi_app ( ) :
app = FastAPI ( )
configure_routes ( app )
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
return app
@pytest.fixture ( scope = " function " )
def client (
content_config : ContentConfig ,
search_config : SearchConfig ,
2023-10-26 21:33:03 +02:00
api_user : KhojApiUser ,
2023-10-26 18:42:29 +02:00
) :
2023-03-01 02:26:06 +01:00
state . config . content_type = content_config
state . config . search_type = search_config
2023-11-15 04:05:09 +01:00
state . SearchType = configure_search_types ( )
2023-12-05 06:35:40 +01:00
state . embeddings_model = dict ( )
state . embeddings_model [ " default " ] = EmbeddingsModel ( )
state . cross_encoder_model = dict ( )
state . cross_encoder_model [ " default " ] = CrossEncoderModel ( )
2023-03-01 02:26:06 +01:00
2023-06-14 01:32:47 +02:00
# These lines help us Mock the Search models for these search types
2023-10-26 18:42:29 +02:00
text_search . setup (
2023-11-01 22:51:33 +01:00
OrgToEntries ,
2023-08-31 21:55:17 +02:00
get_sample_data ( " org " ) ,
regenerate = False ,
2023-10-26 21:33:03 +02:00
user = api_user . user ,
2023-07-14 10:19:38 +02:00
)
2023-10-26 18:42:29 +02:00
text_search . setup (
2023-11-01 22:51:33 +01:00
PlaintextToEntries ,
2023-08-31 21:55:17 +02:00
get_sample_data ( " plaintext " ) ,
regenerate = False ,
2023-10-26 21:33:03 +02:00
user = api_user . user ,
2023-08-31 21:55:17 +02:00
)
2023-06-14 01:32:47 +02:00
2023-10-26 21:33:03 +02:00
state . anonymous_mode = False
2023-08-01 05:24:52 +02:00
2023-11-18 03:22:45 +01:00
app = FastAPI ( )
2023-08-01 05:24:52 +02:00
configure_routes ( app )
2023-10-15 04:39:13 +02:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-08-01 05:24:52 +02:00
return TestClient ( app )
@pytest.fixture ( scope = " function " )
2023-10-26 21:33:03 +02:00
def client_offline_chat ( search_config : SearchConfig , default_user2 : KhojUser ) :
2023-08-27 03:11:18 +02:00
# Initialize app state
2023-08-01 05:24:52 +02:00
state . config . search_type = search_config
2023-11-15 04:05:09 +01:00
state . SearchType = configure_search_types ( )
2023-08-01 05:24:52 +02:00
2023-10-26 20:37:41 +02:00
LocalMarkdownConfig . objects . create (
input_files = None ,
input_filter = [ " tests/data/markdown/*.markdown " ] ,
user = default_user2 ,
)
all_files = fs_syncer . collect_files ( user = default_user2 )
2024-04-05 08:40:03 +02:00
configure_content ( all_files , user = default_user2 )
2023-08-01 05:24:52 +02:00
2023-08-27 03:11:18 +02:00
# Initialize Processor from Config
2024-04-23 19:38:29 +02:00
ChatModelOptionsFactory (
2024-08-16 14:58:04 +02:00
chat_model = " bartowski/Meta-Llama-3.1-8B-Instruct-GGUF " ,
2024-04-23 19:38:29 +02:00
tokenizer = None ,
max_prompt_size = None ,
model_type = " offline " ,
)
2023-11-02 18:43:27 +01:00
UserConversationProcessorConfigFactory ( user = default_user2 )
2023-10-26 20:37:41 +02:00
2023-10-26 18:42:29 +02:00
state . anonymous_mode = True
2023-08-01 05:24:52 +02:00
2023-10-26 21:33:03 +02:00
app = FastAPI ( )
2023-03-01 02:26:06 +01:00
configure_routes ( app )
2023-10-15 04:39:13 +02:00
configure_middleware ( app )
app . mount ( " /static " , StaticFiles ( directory = web_directory ) , name = " static " )
2023-03-01 02:26:06 +01:00
return TestClient ( app )
2023-02-17 17:04:26 +01:00
@pytest.fixture ( scope = " function " )
2023-10-26 18:42:29 +02:00
def new_org_file ( default_user : KhojUser , content_config : ContentConfig ) :
2023-01-09 20:17:36 +01:00
# Setup
2023-10-26 18:42:29 +02:00
org_config = LocalOrgConfig . objects . filter ( user = default_user ) . first ( )
input_filters = org_config . input_filter
new_org_file = Path ( input_filters [ 0 ] ) . parent / " new_file.org "
2023-01-09 20:17:36 +01:00
new_org_file . touch ( )
yield new_org_file
# Cleanup
if new_org_file . exists ( ) :
new_org_file . unlink ( )
2023-02-17 17:04:26 +01:00
@pytest.fixture ( scope = " function " )
2023-10-26 18:42:29 +02:00
def org_config_with_only_new_file ( new_org_file : Path , default_user : KhojUser ) :
LocalOrgConfig . objects . update ( input_files = [ str ( new_org_file ) ] , input_filter = None )
return LocalOrgConfig . objects . filter ( user = default_user ) . first ( )
2023-08-31 21:55:17 +02:00
2024-06-18 16:01:07 +02:00
@pytest.fixture ( scope = " function " )
def pdf_configured_user1 ( default_user : KhojUser ) :
LocalPdfConfig . objects . create (
input_files = None ,
input_filter = [ " tests/data/pdf/singlepage.pdf " ] ,
user = default_user ,
)
# Index Markdown Content for Search
all_files = fs_syncer . collect_files ( user = default_user )
success = configure_content ( all_files , user = default_user )
2023-08-31 21:55:17 +02:00
@pytest.fixture ( scope = " function " )
def sample_org_data ( ) :
return get_sample_data ( " org " )
def get_sample_data ( type ) :
sample_data = {
" org " : {
2023-11-16 11:47:58 +01:00
" elisp.org " : """
* Emacs Khoj
/ An Emacs interface for [ [ https : / / github . com / khoj - ai / khoj ] [ khoj ] ] /
* * Requirements
- Install and Run [ [ https : / / github . com / khoj - ai / khoj ] [ khoj ] ]
* * Installation
* * * Direct
2024-06-24 09:20:33 +02:00
- Put ~ khoj . el ~ in your Emacs load path . For e . g . ~ / . emacs . d / lisp
2023-11-16 11:47:58 +01:00
- Load via ~ use - package ~ in your ~ / . emacs . d / init . el or . emacs file by adding below snippet
#+begin_src elisp
; ; Khoj Package
( use - package khoj
: load - path " ~/.emacs.d/lisp/khoj.el "
: bind ( " C-c s " . ' khoj))
#+end_src
* * * Using [ [ https : / / github . com / quelpa / quelpa #installation][Quelpa]]
- Ensure [ [ https : / / github . com / quelpa / quelpa #installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
- Add below snippet to your ~ / . emacs . d / init . el or . emacs config file and execute it .
#+begin_src elisp
; ; Khoj Package
( use - package khoj
: quelpa ( khoj : fetcher url : url " https://raw.githubusercontent.com/khoj-ai/khoj/master/interface/emacs/khoj.el " )
: bind ( " C-c s " . ' khoj))
#+end_src
* * Usage
1. Call ~ khoj ~ using keybinding ~ C - c s ~ or ~ M - x khoj ~
2. Enter Query in Natural Language
2024-06-24 09:20:33 +02:00
e . g . " What is the meaning of life? " " What are my life goals? "
2023-11-16 11:47:58 +01:00
3. Wait for results
* Note : It takes about 15 s on a Mac M1 and a ~ 100 K lines corpus of org - mode files *
4. ( Optional ) Narrow down results further
Include / Exclude specific words from results by adding to query
2024-06-24 09:20:33 +02:00
e . g . " What is the meaning of life? -god +none "
2023-11-16 11:47:58 +01:00
""" ,
2023-08-31 21:55:17 +02:00
" readme.org " : """
* Khoj
/ Allow natural language search on user content like notes , images using transformer based models /
All data is processed locally . User can interface with khoj app via [ [ . / interface / emacs / khoj . el ] [ Emacs ] ] , API or Commandline
* * Dependencies
- Python3
- [ [ https : / / docs . conda . io / en / latest / miniconda . html #latest-miniconda-installer-links][Miniconda]]
* * Install
#+begin_src shell
git clone https : / / github . com / khoj - ai / khoj & & cd khoj
conda env create - f environment . yml
conda activate khoj
2023-11-16 11:47:58 +01:00
#+end_src""",
2023-08-31 21:55:17 +02:00
} ,
" markdown " : {
" readme.markdown " : """
# Khoj
Allow natural language search on user content like notes , images using transformer based models
All data is processed locally . User can interface with khoj app via [ Emacs ] ( . / interface / emacs / khoj . el ) , API or Commandline
## Dependencies
- Python3
- [ Miniconda ] ( https : / / docs . conda . io / en / latest / miniconda . html #latest-miniconda-installer-links)
## Install
` ` ` shell
git clone
conda env create - f environment . yml
conda activate khoj
` ` `
"""
} ,
" plaintext " : {
" readme.txt " : """
Khoj
Allow natural language search on user content like notes , images using transformer based models
All data is processed locally . User can interface with khoj app via Emacs , API or Commandline
Dependencies
- Python3
- Miniconda
Install
git clone
conda env create - f environment . yml
conda activate khoj
"""
} ,
}
return sample_data [ type ]