From 5923b6d89eaa4a0c0d51888343350ab2cbc983b0 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 18 Jul 2024 22:30:52 +0530 Subject: [PATCH] Split /api/v1/index/update into /api/content PUT, PATCH API endpoints - This utilizes PUT, PATCH HTTP method semantics to remove need for the "regenerate" query param and "/update" url suffix - This should make the url more succinct and API request intent more understandable by using existing HTTP method semantics --- src/interface/desktop/main.js | 8 ++- src/interface/emacs/khoj.el | 4 +- src/interface/obsidian/src/utils.ts | 5 +- src/interface/web/app/common/chatFunctions.ts | 4 +- src/khoj/configure.py | 6 +-- src/khoj/interface/web/chat.html | 4 +- .../routers/{indexer.py => api_content.py} | 54 +++++++++++++++---- tests/conftest.py | 2 +- tests/test_client.py | 22 ++++---- tests/test_multiple_users.py | 4 +- 10 files changed, 76 insertions(+), 37 deletions(-) rename src/khoj/routers/{indexer.py => api_content.py} (74%) diff --git a/src/interface/desktop/main.js b/src/interface/desktop/main.js index 76abe63c..618825e1 100644 --- a/src/interface/desktop/main.js +++ b/src/interface/desktop/main.js @@ -233,11 +233,15 @@ function pushDataToKhoj (regenerate = false) { // Request indexing files on server. With upto 1000 files in each request for (let i = 0; i < filesDataToPush.length; i += 1000) { + const syncUrl = `${hostURL}/api/content?client=desktop`; const filesDataGroup = filesDataToPush.slice(i, i + 1000); const formData = new FormData(); filesDataGroup.forEach(fileData => { formData.append('files', fileData.blob, fileData.path) }); - let request = axios.post(`${hostURL}/api/v1/index/update?force=${regenerate}&client=desktop`, formData, { headers }); - requests.push(request); + requests.push( + regenerate + ? axios.put(syncUrl, formData, { headers }) + : axios.patch(syncUrl, formData, { headers }) + ); } // Wait for requests batch to finish diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index c5a07cde..04c821e1 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -424,12 +424,12 @@ Auto invokes setup steps on calling main entrypoint." "Send multi-part form `BODY' of `CONTENT-TYPE' in request to khoj server. Append 'TYPE-QUERY' as query parameter in request url. Specify `BOUNDARY' used to separate files in request header." - (let ((url-request-method "POST") + (let ((url-request-method ((if force) "PUT" "PATCH")) (url-request-data body) (url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary)) ("Authorization" . ,(format "Bearer %s" khoj-api-key))))) (with-current-buffer - (url-retrieve (format "%s/api/v1/index/update?%s&force=%s&client=emacs" khoj-server-url type-query (or force "false")) + (url-retrieve (format "%s/api/content?%s&client=emacs" khoj-server-url type-query) ;; render response from indexing API endpoint on server (lambda (status) (if (not (plist-get status :error)) diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index 5c8b3cf9..55e3f63a 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -89,10 +89,11 @@ export async function updateContentIndex(vault: Vault, setting: KhojSetting, las for (let i = 0; i < fileData.length; i += 1000) { const filesGroup = fileData.slice(i, i + 1000); const formData = new FormData(); + const method = regenerate ? "PUT" : "PATCH"; filesGroup.forEach(fileItem => { formData.append('files', fileItem.blob, fileItem.path) }); // Call Khoj backend to update index with all markdown, pdf files - const response = await fetch(`${setting.khojUrl}/api/v1/index/update?force=${regenerate}&client=obsidian`, { - method: 'POST', + const response = await fetch(`${setting.khojUrl}/api/content?client=obsidian`, { + method: method, headers: { 'Authorization': `Bearer ${setting.khojApiKey}`, }, diff --git a/src/interface/web/app/common/chatFunctions.ts b/src/interface/web/app/common/chatFunctions.ts index 480f6746..0e299823 100644 --- a/src/interface/web/app/common/chatFunctions.ts +++ b/src/interface/web/app/common/chatFunctions.ts @@ -275,8 +275,8 @@ export function uploadDataForIndexing( // Wait for all files to be read before making the fetch request Promise.all(fileReadPromises) .then(() => { - return fetch("/api/v1/index/update?force=false&client=web", { - method: "POST", + return fetch("/api/content?client=web", { + method: "PATCH", body: formData, }); }) diff --git a/src/khoj/configure.py b/src/khoj/configure.py index 819fc15d..44dcf584 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -42,7 +42,7 @@ from khoj.database.adapters import ( ) from khoj.database.models import ClientApplication, KhojUser, ProcessLock, Subscription from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel -from khoj.routers.indexer import configure_content, configure_search +from khoj.routers.api_content import configure_content, configure_search from khoj.routers.twilio import is_twilio_enabled from khoj.utils import constants, state from khoj.utils.config import SearchType @@ -309,7 +309,7 @@ def configure_routes(app): from khoj.routers.api_agents import api_agents from khoj.routers.api_chat import api_chat from khoj.routers.api_config import api_config - from khoj.routers.indexer import indexer + from khoj.routers.api_content import api_content from khoj.routers.notion import notion_router from khoj.routers.web_client import web_client @@ -317,7 +317,7 @@ def configure_routes(app): app.include_router(api_chat, prefix="/api/chat") app.include_router(api_agents, prefix="/api/agents") app.include_router(api_config, prefix="/api/configure") - app.include_router(indexer, prefix="/api/v1/index") + app.include_router(api_content, prefix="/api/content") app.include_router(notion_router, prefix="/api/notion") app.include_router(web_client) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 38007ce2..fbbe6a3a 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -998,8 +998,8 @@ To get started, just start typing below. You can also type / to see a list of co // Wait for all files to be read before making the fetch request Promise.all(fileReadPromises) .then(() => { - return fetch("/api/v1/index/update?force=false&client=web", { - method: "POST", + return fetch("/api/content?client=web", { + method: "PATCH", body: formData, }); }) diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/api_content.py similarity index 74% rename from src/khoj/routers/indexer.py rename to src/khoj/routers/api_content.py index 5c080cd4..d4f9f6ec 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/api_content.py @@ -19,7 +19,7 @@ from khoj.utils.yaml import save_config_to_file_updated_state logger = logging.getLogger(__name__) -indexer = APIRouter() +api_content = APIRouter() class File(BaseModel): @@ -40,12 +40,11 @@ class IndexerInput(BaseModel): docx: Optional[dict[str, bytes]] = None -@indexer.post("/update") +@api_content.put("") @requires(["authenticated"]) -async def update( +async def put_content( request: Request, files: list[UploadFile], - force: bool = False, t: Optional[Union[state.SearchType, str]] = state.SearchType.All, client: Optional[str] = None, user_agent: Optional[str] = Header(None), @@ -59,8 +58,44 @@ async def update( subscribed_total_entries_size_limit=100, ) ), +): + return await indexer(request, files, t, True, client, user_agent, referer, host) + + +@api_content.patch("") +@requires(["authenticated"]) +async def patch_content( + request: Request, + files: list[UploadFile], + t: Optional[Union[state.SearchType, str]] = state.SearchType.All, + client: Optional[str] = None, + user_agent: Optional[str] = Header(None), + referer: Optional[str] = Header(None), + host: Optional[str] = Header(None), + indexed_data_limiter: ApiIndexedDataLimiter = Depends( + ApiIndexedDataLimiter( + incoming_entries_size_limit=10, + subscribed_incoming_entries_size_limit=25, + total_entries_size_limit=10, + subscribed_total_entries_size_limit=100, + ) + ), +): + return await indexer(request, files, t, False, client, user_agent, referer, host) + + +async def indexer( + request: Request, + files: list[UploadFile], + t: Optional[Union[state.SearchType, str]] = state.SearchType.All, + regenerate: bool = False, + client: Optional[str] = None, + user_agent: Optional[str] = Header(None), + referer: Optional[str] = Header(None), + host: Optional[str] = Header(None), ): user = request.user.object + method = "regenerate" if regenerate else "sync" index_files: Dict[str, Dict[str, str]] = { "org": {}, "markdown": {}, @@ -116,18 +151,17 @@ async def update( None, configure_content, indexer_input.model_dump(), - force, + regenerate, t, - False, user, ) if not success: - raise RuntimeError("Failed to update content index") - logger.info(f"Finished processing batch indexing request") + raise RuntimeError(f"Failed to {method} {t} data sent by {client} client into content index") + logger.info(f"Finished {method} {t} data sent by {client} client into content index") except Exception as e: - logger.error(f"Failed to process batch indexing request: {e}", exc_info=True) + logger.error(f"Failed to {method} {t} data sent by {client} client into content index: {e}", exc_info=True) logger.error( - f'🚨 Failed to {"force " if force else ""}update {t} content index triggered via API call by {client} client: {e}', + f"🚨 Failed to {method} {t} data sent by {client} client into content index: {e}", exc_info=True, ) return Response(content="Failed", status_code=500) diff --git a/tests/conftest.py b/tests/conftest.py index a16413a0..61578ce2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,7 @@ from khoj.database.models import ( from khoj.processor.content.org_mode.org_to_entries import OrgToEntries from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel -from khoj.routers.indexer import configure_content +from khoj.routers.api_content import configure_content from khoj.search_type import text_search from khoj.utils import fs_syncer, state from khoj.utils.config import SearchModels diff --git a/tests/test_client.py b/tests/test_client.py index 0fcdb733..b7c11590 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -75,7 +75,7 @@ def test_index_update_with_no_auth_key(client): files = get_sample_files_data() # Act - response = client.post("/api/v1/index/update", files=files) + response = client.patch("/api/content", files=files) # Assert assert response.status_code == 403 @@ -89,7 +89,7 @@ def test_index_update_with_invalid_auth_key(client): headers = {"Authorization": "Bearer kk-invalid-token"} # Act - response = client.post("/api/v1/index/update", files=files, headers=headers) + response = client.patch("/api/content", files=files, headers=headers) # Assert assert response.status_code == 403 @@ -130,7 +130,7 @@ def test_index_update_big_files(client): headers = {"Authorization": "Bearer kk-secret"} # Act - response = client.post("/api/v1/index/update", files=files, headers=headers) + response = client.patch("/api/content", files=files, headers=headers) # Assert assert response.status_code == 429 @@ -146,7 +146,7 @@ def test_index_update_medium_file_unsubscribed(client, api_user4: KhojApiUser): headers = {"Authorization": f"Bearer {api_token}"} # Act - response = client.post("/api/v1/index/update", files=files, headers=headers) + response = client.patch("/api/content", files=files, headers=headers) # Assert assert response.status_code == 429 @@ -162,7 +162,7 @@ def test_index_update_normal_file_unsubscribed(client, api_user4: KhojApiUser): headers = {"Authorization": f"Bearer {api_token}"} # Act - response = client.post("/api/v1/index/update", files=files, headers=headers) + response = client.patch("/api/content", files=files, headers=headers) # Assert assert response.status_code == 200 @@ -177,7 +177,7 @@ def test_index_update_big_files_no_billing(client): headers = {"Authorization": "Bearer kk-secret"} # Act - response = client.post("/api/v1/index/update", files=files, headers=headers) + response = client.patch("/api/content", files=files, headers=headers) # Assert assert response.status_code == 200 @@ -191,7 +191,7 @@ def test_index_update(client): headers = {"Authorization": "Bearer kk-secret"} # Act - response = client.post("/api/v1/index/update", files=files, headers=headers) + response = client.patch("/api/content", files=files, headers=headers) # Assert assert response.status_code == 200 @@ -208,8 +208,8 @@ def test_index_update_fails_if_more_than_1000_files(client, api_user4: KhojApiUs headers = {"Authorization": f"Bearer {api_token}"} # Act - ok_response = client.post("/api/v1/index/update", files=files[:1000], headers=headers) - bad_response = client.post("/api/v1/index/update", files=files, headers=headers) + ok_response = client.patch("/api/content", files=files[:1000], headers=headers) + bad_response = client.patch("/api/content", files=files, headers=headers) # Assert assert ok_response.status_code == 200 @@ -226,7 +226,7 @@ def test_regenerate_with_valid_content_type(client): headers = {"Authorization": "Bearer kk-secret"} # Act - response = client.post(f"/api/v1/index/update?t={content_type}", files=files, headers=headers) + response = client.patch(f"/api/content?t={content_type}", files=files, headers=headers) # Assert assert response.status_code == 200, f"Returned status: {response.status_code} for content type: {content_type}" @@ -243,7 +243,7 @@ def test_regenerate_with_github_fails_without_pat(client): files = get_sample_files_data() # Act - response = client.post(f"/api/v1/index/update?t=github", files=files, headers=headers) + response = client.patch(f"/api/content?t=github", files=files, headers=headers) # Assert assert response.status_code == 200, f"Returned status: {response.status_code} for content type: github" diff --git a/tests/test_multiple_users.py b/tests/test_multiple_users.py index 4e8e456a..d8f8725e 100644 --- a/tests/test_multiple_users.py +++ b/tests/test_multiple_users.py @@ -29,7 +29,7 @@ def test_index_update_with_user2(client, api_user2: KhojApiUser): source_file_symbol = set([f[1][0] for f in files]) headers = {"Authorization": f"Bearer {api_user2.token}"} - update_response = client.post("/api/v1/index/update", files=files, headers=headers) + update_response = client.patch("/api/content", files=files, headers=headers) search_response = client.get("/api/search?q=hardware&t=all", headers=headers) results = search_response.json() @@ -47,7 +47,7 @@ def test_index_update_with_user2_inaccessible_user1(client, api_user2: KhojApiUs source_file_symbol = set([f[1][0] for f in files]) headers = {"Authorization": f"Bearer {api_user2.token}"} - update_response = client.post("/api/v1/index/update", files=files, headers=headers) + update_response = client.patch("/api/content", files=files, headers=headers) # Act headers = {"Authorization": f"Bearer {api_user.token}"}