Split /api/v1/index/update into /api/content PUT, PATCH API endpoints

- This utilizes PUT, PATCH HTTP method semantics to remove need for
  the "regenerate" query param and "/update" url suffix
- This should make the url more succinct and API request intent more
  understandable by using existing HTTP method semantics
This commit is contained in:
Debanjum Singh Solanky 2024-07-18 22:30:52 +05:30
parent 65dade4838
commit 5923b6d89e
10 changed files with 76 additions and 37 deletions

View file

@ -233,11 +233,15 @@ function pushDataToKhoj (regenerate = false) {
// Request indexing files on server. With upto 1000 files in each request
for (let i = 0; i < filesDataToPush.length; i += 1000) {
const syncUrl = `${hostURL}/api/content?client=desktop`;
const filesDataGroup = filesDataToPush.slice(i, i + 1000);
const formData = new FormData();
filesDataGroup.forEach(fileData => { formData.append('files', fileData.blob, fileData.path) });
let request = axios.post(`${hostURL}/api/v1/index/update?force=${regenerate}&client=desktop`, formData, { headers });
requests.push(request);
requests.push(
regenerate
? axios.put(syncUrl, formData, { headers })
: axios.patch(syncUrl, formData, { headers })
);
}
// Wait for requests batch to finish

View file

@ -424,12 +424,12 @@ Auto invokes setup steps on calling main entrypoint."
"Send multi-part form `BODY' of `CONTENT-TYPE' in request to khoj server.
Append 'TYPE-QUERY' as query parameter in request url.
Specify `BOUNDARY' used to separate files in request header."
(let ((url-request-method "POST")
(let ((url-request-method ((if force) "PUT" "PATCH"))
(url-request-data body)
(url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
("Authorization" . ,(format "Bearer %s" khoj-api-key)))))
(with-current-buffer
(url-retrieve (format "%s/api/v1/index/update?%s&force=%s&client=emacs" khoj-server-url type-query (or force "false"))
(url-retrieve (format "%s/api/content?%s&client=emacs" khoj-server-url type-query)
;; render response from indexing API endpoint on server
(lambda (status)
(if (not (plist-get status :error))

View file

@ -89,10 +89,11 @@ export async function updateContentIndex(vault: Vault, setting: KhojSetting, las
for (let i = 0; i < fileData.length; i += 1000) {
const filesGroup = fileData.slice(i, i + 1000);
const formData = new FormData();
const method = regenerate ? "PUT" : "PATCH";
filesGroup.forEach(fileItem => { formData.append('files', fileItem.blob, fileItem.path) });
// Call Khoj backend to update index with all markdown, pdf files
const response = await fetch(`${setting.khojUrl}/api/v1/index/update?force=${regenerate}&client=obsidian`, {
method: 'POST',
const response = await fetch(`${setting.khojUrl}/api/content?client=obsidian`, {
method: method,
headers: {
'Authorization': `Bearer ${setting.khojApiKey}`,
},

View file

@ -275,8 +275,8 @@ export function uploadDataForIndexing(
// Wait for all files to be read before making the fetch request
Promise.all(fileReadPromises)
.then(() => {
return fetch("/api/v1/index/update?force=false&client=web", {
method: "POST",
return fetch("/api/content?client=web", {
method: "PATCH",
body: formData,
});
})

View file

@ -42,7 +42,7 @@ from khoj.database.adapters import (
)
from khoj.database.models import ClientApplication, KhojUser, ProcessLock, Subscription
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
from khoj.routers.indexer import configure_content, configure_search
from khoj.routers.api_content import configure_content, configure_search
from khoj.routers.twilio import is_twilio_enabled
from khoj.utils import constants, state
from khoj.utils.config import SearchType
@ -309,7 +309,7 @@ def configure_routes(app):
from khoj.routers.api_agents import api_agents
from khoj.routers.api_chat import api_chat
from khoj.routers.api_config import api_config
from khoj.routers.indexer import indexer
from khoj.routers.api_content import api_content
from khoj.routers.notion import notion_router
from khoj.routers.web_client import web_client
@ -317,7 +317,7 @@ def configure_routes(app):
app.include_router(api_chat, prefix="/api/chat")
app.include_router(api_agents, prefix="/api/agents")
app.include_router(api_config, prefix="/api/configure")
app.include_router(indexer, prefix="/api/v1/index")
app.include_router(api_content, prefix="/api/content")
app.include_router(notion_router, prefix="/api/notion")
app.include_router(web_client)

View file

@ -998,8 +998,8 @@ To get started, just start typing below. You can also type / to see a list of co
// Wait for all files to be read before making the fetch request
Promise.all(fileReadPromises)
.then(() => {
return fetch("/api/v1/index/update?force=false&client=web", {
method: "POST",
return fetch("/api/content?client=web", {
method: "PATCH",
body: formData,
});
})

View file

@ -19,7 +19,7 @@ from khoj.utils.yaml import save_config_to_file_updated_state
logger = logging.getLogger(__name__)
indexer = APIRouter()
api_content = APIRouter()
class File(BaseModel):
@ -40,12 +40,11 @@ class IndexerInput(BaseModel):
docx: Optional[dict[str, bytes]] = None
@indexer.post("/update")
@api_content.put("")
@requires(["authenticated"])
async def update(
async def put_content(
request: Request,
files: list[UploadFile],
force: bool = False,
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
client: Optional[str] = None,
user_agent: Optional[str] = Header(None),
@ -59,8 +58,44 @@ async def update(
subscribed_total_entries_size_limit=100,
)
),
):
return await indexer(request, files, t, True, client, user_agent, referer, host)
@api_content.patch("")
@requires(["authenticated"])
async def patch_content(
request: Request,
files: list[UploadFile],
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
client: Optional[str] = None,
user_agent: Optional[str] = Header(None),
referer: Optional[str] = Header(None),
host: Optional[str] = Header(None),
indexed_data_limiter: ApiIndexedDataLimiter = Depends(
ApiIndexedDataLimiter(
incoming_entries_size_limit=10,
subscribed_incoming_entries_size_limit=25,
total_entries_size_limit=10,
subscribed_total_entries_size_limit=100,
)
),
):
return await indexer(request, files, t, False, client, user_agent, referer, host)
async def indexer(
request: Request,
files: list[UploadFile],
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
regenerate: bool = False,
client: Optional[str] = None,
user_agent: Optional[str] = Header(None),
referer: Optional[str] = Header(None),
host: Optional[str] = Header(None),
):
user = request.user.object
method = "regenerate" if regenerate else "sync"
index_files: Dict[str, Dict[str, str]] = {
"org": {},
"markdown": {},
@ -116,18 +151,17 @@ async def update(
None,
configure_content,
indexer_input.model_dump(),
force,
regenerate,
t,
False,
user,
)
if not success:
raise RuntimeError("Failed to update content index")
logger.info(f"Finished processing batch indexing request")
raise RuntimeError(f"Failed to {method} {t} data sent by {client} client into content index")
logger.info(f"Finished {method} {t} data sent by {client} client into content index")
except Exception as e:
logger.error(f"Failed to process batch indexing request: {e}", exc_info=True)
logger.error(f"Failed to {method} {t} data sent by {client} client into content index: {e}", exc_info=True)
logger.error(
f'🚨 Failed to {"force " if force else ""}update {t} content index triggered via API call by {client} client: {e}',
f"🚨 Failed to {method} {t} data sent by {client} client into content index: {e}",
exc_info=True,
)
return Response(content="Failed", status_code=500)

View file

@ -25,7 +25,7 @@ from khoj.database.models import (
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
from khoj.routers.indexer import configure_content
from khoj.routers.api_content import configure_content
from khoj.search_type import text_search
from khoj.utils import fs_syncer, state
from khoj.utils.config import SearchModels

View file

@ -75,7 +75,7 @@ def test_index_update_with_no_auth_key(client):
files = get_sample_files_data()
# Act
response = client.post("/api/v1/index/update", files=files)
response = client.patch("/api/content", files=files)
# Assert
assert response.status_code == 403
@ -89,7 +89,7 @@ def test_index_update_with_invalid_auth_key(client):
headers = {"Authorization": "Bearer kk-invalid-token"}
# Act
response = client.post("/api/v1/index/update", files=files, headers=headers)
response = client.patch("/api/content", files=files, headers=headers)
# Assert
assert response.status_code == 403
@ -130,7 +130,7 @@ def test_index_update_big_files(client):
headers = {"Authorization": "Bearer kk-secret"}
# Act
response = client.post("/api/v1/index/update", files=files, headers=headers)
response = client.patch("/api/content", files=files, headers=headers)
# Assert
assert response.status_code == 429
@ -146,7 +146,7 @@ def test_index_update_medium_file_unsubscribed(client, api_user4: KhojApiUser):
headers = {"Authorization": f"Bearer {api_token}"}
# Act
response = client.post("/api/v1/index/update", files=files, headers=headers)
response = client.patch("/api/content", files=files, headers=headers)
# Assert
assert response.status_code == 429
@ -162,7 +162,7 @@ def test_index_update_normal_file_unsubscribed(client, api_user4: KhojApiUser):
headers = {"Authorization": f"Bearer {api_token}"}
# Act
response = client.post("/api/v1/index/update", files=files, headers=headers)
response = client.patch("/api/content", files=files, headers=headers)
# Assert
assert response.status_code == 200
@ -177,7 +177,7 @@ def test_index_update_big_files_no_billing(client):
headers = {"Authorization": "Bearer kk-secret"}
# Act
response = client.post("/api/v1/index/update", files=files, headers=headers)
response = client.patch("/api/content", files=files, headers=headers)
# Assert
assert response.status_code == 200
@ -191,7 +191,7 @@ def test_index_update(client):
headers = {"Authorization": "Bearer kk-secret"}
# Act
response = client.post("/api/v1/index/update", files=files, headers=headers)
response = client.patch("/api/content", files=files, headers=headers)
# Assert
assert response.status_code == 200
@ -208,8 +208,8 @@ def test_index_update_fails_if_more_than_1000_files(client, api_user4: KhojApiUs
headers = {"Authorization": f"Bearer {api_token}"}
# Act
ok_response = client.post("/api/v1/index/update", files=files[:1000], headers=headers)
bad_response = client.post("/api/v1/index/update", files=files, headers=headers)
ok_response = client.patch("/api/content", files=files[:1000], headers=headers)
bad_response = client.patch("/api/content", files=files, headers=headers)
# Assert
assert ok_response.status_code == 200
@ -226,7 +226,7 @@ def test_regenerate_with_valid_content_type(client):
headers = {"Authorization": "Bearer kk-secret"}
# Act
response = client.post(f"/api/v1/index/update?t={content_type}", files=files, headers=headers)
response = client.patch(f"/api/content?t={content_type}", files=files, headers=headers)
# Assert
assert response.status_code == 200, f"Returned status: {response.status_code} for content type: {content_type}"
@ -243,7 +243,7 @@ def test_regenerate_with_github_fails_without_pat(client):
files = get_sample_files_data()
# Act
response = client.post(f"/api/v1/index/update?t=github", files=files, headers=headers)
response = client.patch(f"/api/content?t=github", files=files, headers=headers)
# Assert
assert response.status_code == 200, f"Returned status: {response.status_code} for content type: github"

View file

@ -29,7 +29,7 @@ def test_index_update_with_user2(client, api_user2: KhojApiUser):
source_file_symbol = set([f[1][0] for f in files])
headers = {"Authorization": f"Bearer {api_user2.token}"}
update_response = client.post("/api/v1/index/update", files=files, headers=headers)
update_response = client.patch("/api/content", files=files, headers=headers)
search_response = client.get("/api/search?q=hardware&t=all", headers=headers)
results = search_response.json()
@ -47,7 +47,7 @@ def test_index_update_with_user2_inaccessible_user1(client, api_user2: KhojApiUs
source_file_symbol = set([f[1][0] for f in files])
headers = {"Authorization": f"Bearer {api_user2.token}"}
update_response = client.post("/api/v1/index/update", files=files, headers=headers)
update_response = client.patch("/api/content", files=files, headers=headers)
# Act
headers = {"Authorization": f"Bearer {api_user.token}"}