diff --git a/Readme.md b/Readme.md index da09b2ce..14794746 100644 --- a/Readme.md +++ b/Readme.md @@ -85,7 +85,7 @@ khoj ### 3. Configure 1. Enable content types and point to files to search in the First Run Screen that pops up on app start -2. Click configure and wait. The app will load ML model, generates embeddings and expose the search API +2. Click `Configure` and wait. The app will download ML models and index the content for search ## Use @@ -113,7 +113,7 @@ pip install --upgrade khoj-assistant ## Miscellaneous -- The beta [chat](http://localhost:8000/beta/chat) and [search](http://localhost:8000/beta/search) API endpoints use [OpenAI API](https://openai.com/api/) +- The beta [chat](http://localhost:8000/api/beta/chat) and [search](http://localhost:8000/api/beta/search) API endpoints use [OpenAI API](https://openai.com/api/) - It is disabled by default - To use it add your `openai-api-key` via the app configure screen - Warning: *If you use the above beta APIs, your query and top result(s) will be sent to OpenAI for processing* diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index c34fb88e..c05f98c6 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -226,7 +226,7 @@ Use `which-key` if available, else display simple message in echo area" (defun khoj--get-enabled-content-types () "Get content types enabled for search from API." - (let ((config-url (format "%s/config/data" khoj-server-url))) + (let ((config-url (format "%s/api/v1.0/config/data" khoj-server-url))) (with-temp-buffer (erase-buffer) (url-insert-file-contents config-url) @@ -243,7 +243,7 @@ Use `which-key` if available, else display simple message in echo area" "Construct API Query from QUERY, SEARCH-TYPE and (optional) RERANK params." (let ((rerank (or rerank "false")) (encoded-query (url-hexify-string query))) - (format "%s/search?q=%s&t=%s&r=%s&n=%s" khoj-server-url encoded-query search-type rerank khoj-results-count))) + (format "%s/api/v1.0/search?q=%s&t=%s&r=%s&n=%s" khoj-server-url encoded-query search-type rerank khoj-results-count))) (defun khoj--query-api-and-render-results (query search-type query-url buffer-name) "Query Khoj API using QUERY, SEARCH-TYPE, QUERY-URL. diff --git a/src/interface/web/assets/config.js b/src/interface/web/assets/config.js index 30ab6858..90412e1c 100644 --- a/src/interface/web/assets/config.js +++ b/src/interface/web/assets/config.js @@ -10,7 +10,7 @@ var emptyValueDefault = "🖊️"; /** * Fetch the existing config file. */ -fetch("/config/data") +fetch("/api/v1.0/config/data") .then(response => response.json()) .then(data => { rawConfig = data; @@ -26,7 +26,7 @@ fetch("/config/data") configForm.addEventListener("submit", (event) => { event.preventDefault(); console.log(rawConfig); - fetch("/config/data", { + fetch("/api/v1.0/config/data", { method: "POST", credentials: "same-origin", headers: { @@ -46,7 +46,7 @@ regenerateButton.addEventListener("click", (event) => { event.preventDefault(); regenerateButton.style.cursor = "progress"; regenerateButton.disabled = true; - fetch("/regenerate") + fetch("/api/v1.0/update?force=true") .then(response => response.json()) .then(data => { regenerateButton.style.cursor = "pointer"; diff --git a/src/interface/web/index.html b/src/interface/web/index.html index a74041fa..5dc76b87 100644 --- a/src/interface/web/index.html +++ b/src/interface/web/index.html @@ -77,8 +77,8 @@ // Generate Backend API URL to execute Search url = type === "image" - ? `/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}` - : `/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}&r=${rerank}`; + ? `/api/v1.0/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}` + : `/api/v1.0/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}&r=${rerank}`; // Execute Search and Render Results fetch(url) @@ -94,7 +94,7 @@ function updateIndex() { type = document.getElementById("type").value; - fetch(`/reload?t=${type}`) + fetch(`/api/v1.0/update?t=${type}`) .then(response => response.json()) .then(data => { console.log(data); @@ -118,7 +118,7 @@ function populate_type_dropdown() { // Populate type dropdown field with enabled search types only var possible_search_types = ["org", "markdown", "ledger", "music", "image"]; - fetch("/config/data") + fetch("/api/v1.0/config/data") .then(response => response.json()) .then(data => { document.getElementById("type").innerHTML = diff --git a/src/main.py b/src/main.py index 378758b2..13d15674 100644 --- a/src/main.py +++ b/src/main.py @@ -19,7 +19,9 @@ from PyQt6.QtCore import QThread, QTimer # Internal Packages from src.configure import configure_server -from src.router import router +from src.routers.api_v1_0 import api_v1_0 +from src.routers.api_beta import api_beta +from src.routers.frontend import frontend_router from src.utils import constants, state from src.utils.cli import cli from src.interface.desktop.main_window import MainWindow @@ -29,7 +31,9 @@ from src.interface.desktop.system_tray import create_system_tray # Initialize the Application Server app = FastAPI() app.mount("/static", StaticFiles(directory=constants.web_directory), name="static") -app.include_router(router) +app.include_router(api_v1_0, prefix="/api/v1.0") +app.include_router(api_beta, prefix="/api/beta") +app.include_router(frontend_router) logger = logging.getLogger('src') diff --git a/src/routers/api_beta.py b/src/routers/api_beta.py new file mode 100644 index 00000000..389025b9 --- /dev/null +++ b/src/routers/api_beta.py @@ -0,0 +1,89 @@ +# Standard Packages +import json +import logging +from typing import Optional + +# External Packages +from fastapi import APIRouter + +# Internal Packages +from src.routers.api_v1_0 import search +from src.processor.conversation.gpt import converse, extract_search_type, message_to_log, message_to_prompt, understand, summarize +from src.utils.config import SearchType +from src.utils.helpers import get_absolute_path, get_from_dict +from src.utils import state + + +api_beta = APIRouter() +logger = logging.getLogger(__name__) + + +@api_beta.get('/search') +def search_beta(q: str, n: Optional[int] = 1): + # Extract Search Type using GPT + metadata = extract_search_type(q, api_key=state.processor_config.conversation.openai_api_key, verbose=state.verbose) + search_type = get_from_dict(metadata, "search-type") + + # Search + search_results = search(q, n=n, t=SearchType(search_type)) + + # Return response + return {'status': 'ok', 'result': search_results, 'type': search_type} + + +@api_beta.get('/chat') +def chat(q: str): + # Load Conversation History + chat_session = state.processor_config.conversation.chat_session + meta_log = state.processor_config.conversation.meta_log + + # Converse with OpenAI GPT + metadata = understand(q, api_key=state.processor_config.conversation.openai_api_key, verbose=state.verbose) + if state.verbose > 1: + print(f'Understood: {get_from_dict(metadata, "intent")}') + + if get_from_dict(metadata, "intent", "memory-type") == "notes": + query = get_from_dict(metadata, "intent", "query") + result_list = search(query, n=1, t=SearchType.Org) + collated_result = "\n".join([item["entry"] for item in result_list]) + if state.verbose > 1: + print(f'Semantically Similar Notes:\n{collated_result}') + gpt_response = summarize(collated_result, summary_type="notes", user_query=q, api_key=state.processor_config.conversation.openai_api_key) + else: + gpt_response = converse(q, chat_session, api_key=state.processor_config.conversation.openai_api_key) + + # Update Conversation History + state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response) + state.processor_config.conversation.meta_log['chat'] = message_to_log(q, metadata, gpt_response, meta_log.get('chat', [])) + + return {'status': 'ok', 'response': gpt_response} + + +@api_beta.on_event('shutdown') +def shutdown_event(): + # No need to create empty log file + if not (state.processor_config and state.processor_config.conversation and state.processor_config.conversation.meta_log): + return + elif state.processor_config.conversation.verbose: + print('INFO:\tSaving conversation logs to disk...') + + # Summarize Conversation Logs for this Session + chat_session = state.processor_config.conversation.chat_session + openai_api_key = state.processor_config.conversation.openai_api_key + conversation_log = state.processor_config.conversation.meta_log + session = { + "summary": summarize(chat_session, summary_type="chat", api_key=openai_api_key), + "session-start": conversation_log.get("session", [{"session-end": 0}])[-1]["session-end"], + "session-end": len(conversation_log["chat"]) + } + if 'session' in conversation_log: + conversation_log['session'].append(session) + else: + conversation_log['session'] = [session] + + # Save Conversation Metadata Logs to Disk + conversation_logfile = get_absolute_path(state.processor_config.conversation.conversation_logfile) + with open(conversation_logfile, "w+", encoding='utf-8') as logfile: + json.dump(conversation_log, logfile) + + print('INFO:\tConversation logs saved to disk.') diff --git a/src/router.py b/src/routers/api_v1_0.py similarity index 51% rename from src/router.py rename to src/routers/api_v1_0.py index 27b237dd..616dbc17 100644 --- a/src/router.py +++ b/src/routers/api_v1_0.py @@ -1,45 +1,29 @@ # Standard Packages import yaml -import json import time import logging from typing import Optional # External Packages from fastapi import APIRouter -from fastapi import Request -from fastapi.responses import HTMLResponse, FileResponse -from fastapi.templating import Jinja2Templates # Internal Packages from src.configure import configure_search from src.search_type import image_search, text_search -from src.processor.conversation.gpt import converse, extract_search_type, message_to_log, message_to_prompt, understand, summarize from src.utils.rawconfig import FullConfig from src.utils.config import SearchType -from src.utils.helpers import LRU, get_absolute_path, get_from_dict from src.utils import state, constants -router = APIRouter() -templates = Jinja2Templates(directory=constants.web_directory) +api_v1_0 = APIRouter() logger = logging.getLogger(__name__) -query_cache = LRU() -@router.get("/", response_class=FileResponse) -def index(): - return FileResponse(constants.web_directory / "index.html") - -@router.get('/config', response_class=HTMLResponse) -def config_page(request: Request): - return templates.TemplateResponse("config.html", context={'request': request}) - -@router.get('/config/data', response_model=FullConfig) +@api_v1_0.get('/config/data', response_model=FullConfig) def config_data(): return state.config -@router.post('/config/data') +@api_v1_0.post('/config/data') async def config_data(updated_config: FullConfig): state.config = updated_config with open(state.config_file, 'w') as outfile: @@ -47,7 +31,7 @@ async def config_data(updated_config: FullConfig): outfile.close() return state.config -@router.get('/search') +@api_v1_0.get('/search') def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Optional[bool] = False): if q is None or q == '': logger.info(f'No query param (q) passed in API call to initiate search') @@ -137,78 +121,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Opti return results -@router.get('/update') +@api_v1_0.get('/update') def update(t: Optional[SearchType] = None, force: Optional[bool] = False): state.model = configure_search(state.model, state.config, regenerate=force, t=t) - return {'status': 'ok', 'message': 'index updated completed'} - - -@router.get('/beta/search') -def search_beta(q: str, n: Optional[int] = 1): - # Extract Search Type using GPT - metadata = extract_search_type(q, api_key=state.processor_config.conversation.openai_api_key, verbose=state.verbose) - search_type = get_from_dict(metadata, "search-type") - - # Search - search_results = search(q, n=n, t=SearchType(search_type)) - - # Return response - return {'status': 'ok', 'result': search_results, 'type': search_type} - - -@router.get('/beta/chat') -def chat(q: str): - # Load Conversation History - chat_session = state.processor_config.conversation.chat_session - meta_log = state.processor_config.conversation.meta_log - - # Converse with OpenAI GPT - metadata = understand(q, api_key=state.processor_config.conversation.openai_api_key, verbose=state.verbose) - if state.verbose > 1: - print(f'Understood: {get_from_dict(metadata, "intent")}') - - if get_from_dict(metadata, "intent", "memory-type") == "notes": - query = get_from_dict(metadata, "intent", "query") - result_list = search(query, n=1, t=SearchType.Org) - collated_result = "\n".join([item["entry"] for item in result_list]) - if state.verbose > 1: - print(f'Semantically Similar Notes:\n{collated_result}') - gpt_response = summarize(collated_result, summary_type="notes", user_query=q, api_key=state.processor_config.conversation.openai_api_key) - else: - gpt_response = converse(q, chat_session, api_key=state.processor_config.conversation.openai_api_key) - - # Update Conversation History - state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response) - state.processor_config.conversation.meta_log['chat'] = message_to_log(q, metadata, gpt_response, meta_log.get('chat', [])) - - return {'status': 'ok', 'response': gpt_response} - - -@router.on_event('shutdown') -def shutdown_event(): - # No need to create empty log file - if not (state.processor_config and state.processor_config.conversation and state.processor_config.conversation.meta_log): - return - elif state.processor_config.conversation.verbose: - print('INFO:\tSaving conversation logs to disk...') - - # Summarize Conversation Logs for this Session - chat_session = state.processor_config.conversation.chat_session - openai_api_key = state.processor_config.conversation.openai_api_key - conversation_log = state.processor_config.conversation.meta_log - session = { - "summary": summarize(chat_session, summary_type="chat", api_key=openai_api_key), - "session-start": conversation_log.get("session", [{"session-end": 0}])[-1]["session-end"], - "session-end": len(conversation_log["chat"]) - } - if 'session' in conversation_log: - conversation_log['session'].append(session) - else: - conversation_log['session'] = [session] - - # Save Conversation Metadata Logs to Disk - conversation_logfile = get_absolute_path(state.processor_config.conversation.conversation_logfile) - with open(conversation_logfile, "w+", encoding='utf-8') as logfile: - json.dump(conversation_log, logfile) - - print('INFO:\tConversation logs saved to disk.') + return {'status': 'ok', 'message': 'index updated'} diff --git a/src/routers/frontend.py b/src/routers/frontend.py new file mode 100644 index 00000000..8ed5d6ee --- /dev/null +++ b/src/routers/frontend.py @@ -0,0 +1,25 @@ +# Standard Packages +import logging + +# External Packages +from fastapi import APIRouter +from fastapi import Request +from fastapi.responses import HTMLResponse, FileResponse +from fastapi.templating import Jinja2Templates + +# Internal Packages +from src.utils import constants + + +frontend_router = APIRouter() +templates = Jinja2Templates(directory=constants.web_directory) +logger = logging.getLogger(__name__) + + +@frontend_router.get("/", response_class=FileResponse) +def index(): + return FileResponse(constants.web_directory / "index.html") + +@frontend_router.get('/config', response_class=HTMLResponse) +def config_page(request: Request): + return templates.TemplateResponse("config.html", context={'request': request}) diff --git a/tests/data/markdown/main_readme.md b/tests/data/markdown/main_readme.md index 45e289b1..7f626319 100644 --- a/tests/data/markdown/main_readme.md +++ b/tests/data/markdown/main_readme.md @@ -43,9 +43,8 @@ just generating embeddings* - **Khoj via API** - See [Khoj API Docs](http://localhost:8000/docs) - - [Query](http://localhost:8000/search?q=%22what%20is%20the%20meaning%20of%20life%22) - - [Regenerate - Embeddings](http://localhost:8000/regenerate?t=ledger) + - [Query](http://localhost:8000/api/v1.0/search?q=%22what%20is%20the%20meaning%20of%20life%22) + - [Update Index](http://localhost:8000/api/v1.0/update?t=ledger) - [Configure Application](https://localhost:8000/ui) - **Khoj via Emacs** - [Install](https://github.com/debanjum/khoj/tree/master/src/interface/emacs#installation) diff --git a/tests/data/org/main_readme.org b/tests/data/org/main_readme.org index 917562e2..4f63801a 100644 --- a/tests/data/org/main_readme.org +++ b/tests/data/org/main_readme.org @@ -27,8 +27,8 @@ - Run ~M-x khoj ~ or Call ~C-c C-s~ - *Khoj via API* - - Query: ~GET~ [[http://localhost:8000/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:8000/search?q="What is the meaning of life"]] - - Regenerate Embeddings: ~GET~ [[http://localhost:8000/regenerate][http://localhost:8000/regenerate]] + - Query: ~GET~ [[http://localhost:8000/api/v1.0/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:8000/api/v1.0/search?q="What is the meaning of life"]] + - Update Index: ~GET~ [[http://localhost:8000/api/v1.0/update][http://localhost:8000/api/v1.0/update]] - [[http://localhost:8000/docs][Khoj API Docs]] - *Call Khoj via Python Script Directly* diff --git a/tests/test_client.py b/tests/test_client.py index 96fa2c01..c17e7edd 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -28,7 +28,7 @@ def test_search_with_invalid_content_type(): user_query = quote("How to call Khoj from Emacs?") # Act - response = client.get(f"/search?q={user_query}&t=invalid_content_type") + response = client.get(f"/api/v1.0/search?q={user_query}&t=invalid_content_type") # Assert assert response.status_code == 422 @@ -43,29 +43,29 @@ def test_search_with_valid_content_type(content_config: ContentConfig, search_co # config.content_type.image = search_config.image for content_type in ["org", "markdown", "ledger", "music"]: # Act - response = client.get(f"/search?q=random&t={content_type}") + response = client.get(f"/api/v1.0/search?q=random&t={content_type}") # Assert assert response.status_code == 200 # ---------------------------------------------------------------------------------------------------- -def test_reload_with_invalid_content_type(): +def test_update_with_invalid_content_type(): # Act - response = client.get(f"/reload?t=invalid_content_type") + response = client.get(f"/api/v1.0/update?t=invalid_content_type") # Assert assert response.status_code == 422 # ---------------------------------------------------------------------------------------------------- -def test_reload_with_valid_content_type(content_config: ContentConfig, search_config: SearchConfig): +def test_update_with_valid_content_type(content_config: ContentConfig, search_config: SearchConfig): # Arrange config.content_type = content_config config.search_type = search_config for content_type in ["org", "markdown", "ledger", "music"]: # Act - response = client.get(f"/reload?t={content_type}") + response = client.get(f"/api/v1.0/update?t={content_type}") # Assert assert response.status_code == 200 @@ -73,7 +73,7 @@ def test_reload_with_valid_content_type(content_config: ContentConfig, search_co # ---------------------------------------------------------------------------------------------------- def test_regenerate_with_invalid_content_type(): # Act - response = client.get(f"/regenerate?t=invalid_content_type") + response = client.get(f"/api/v1.0/update?force=true&t=invalid_content_type") # Assert assert response.status_code == 422 @@ -87,7 +87,7 @@ def test_regenerate_with_valid_content_type(content_config: ContentConfig, searc for content_type in ["org", "markdown", "ledger", "music", "image"]: # Act - response = client.get(f"/regenerate?t={content_type}") + response = client.get(f"/api/v1.0/update?force=true&t={content_type}") # Assert assert response.status_code == 200 @@ -104,7 +104,7 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig for query, expected_image_name in query_expected_image_pairs: # Act - response = client.get(f"/search?q={query}&n=1&t=image") + response = client.get(f"/api/v1.0/search?q={query}&n=1&t=image") # Assert assert response.status_code == 200 @@ -122,7 +122,7 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig user_query = quote("How to git install application?") # Act - response = client.get(f"/search?q={user_query}&n=1&t=org&r=true") + response = client.get(f"/api/v1.0/search?q={user_query}&n=1&t=org&r=true") # Assert assert response.status_code == 200 @@ -139,7 +139,7 @@ def test_notes_search_with_only_filters(content_config: ContentConfig, search_co user_query = quote('+"Emacs" file:"*.org"') # Act - response = client.get(f"/search?q={user_query}&n=1&t=org") + response = client.get(f"/api/v1.0/search?q={user_query}&n=1&t=org") # Assert assert response.status_code == 200 @@ -156,7 +156,7 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_ user_query = quote('How to git install application? +"Emacs"') # Act - response = client.get(f"/search?q={user_query}&n=1&t=org") + response = client.get(f"/api/v1.0/search?q={user_query}&n=1&t=org") # Assert assert response.status_code == 200 @@ -173,7 +173,7 @@ def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_ user_query = quote('How to git install application? -"clone"') # Act - response = client.get(f"/search?q={user_query}&n=1&t=org") + response = client.get(f"/api/v1.0/search?q={user_query}&n=1&t=org") # Assert assert response.status_code == 200