mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
Add additional telemetry for system understanding (#316)
* Add additional telemetry in order to understand which data sources are the most useful * Make actions side by side in the configuration page * Restore main run command * Update links to point to wiki pages for Github, Notion integrations * Stanardize nomenclature of the api_type to use _config suffix Remove header fields that aren't actually helpful for understanding config usage
This commit is contained in:
parent
c2249eadb2
commit
37f7f9fd1d
6 changed files with 195 additions and 60 deletions
|
@ -51,6 +51,10 @@
|
|||
body.khoj-configure {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
div.section {
|
||||
padding: 12px;
|
||||
}
|
||||
}
|
||||
|
||||
img.khoj-logo {
|
||||
|
@ -69,6 +73,11 @@
|
|||
display: grid;
|
||||
justify-self: center;
|
||||
}
|
||||
|
||||
div.instructions {
|
||||
font-size: large;
|
||||
}
|
||||
|
||||
.section-title {
|
||||
margin: 0;
|
||||
padding: 0 0 16px 0;
|
||||
|
@ -162,6 +171,11 @@
|
|||
max-width: 16px;
|
||||
}
|
||||
|
||||
div.finalize-actions {
|
||||
grid-auto-flow: column;
|
||||
grid-gap: 24px;
|
||||
}
|
||||
|
||||
@media screen and (max-width: 600px) {
|
||||
.section-cards {
|
||||
grid-template-columns: 1fr;
|
||||
|
|
|
@ -204,6 +204,8 @@
|
|||
<input type="range" id="results-count-slider" name="results-count-slider" min="1" max="10" step="1" value="5">
|
||||
</div>
|
||||
<div id="status" style="display: none;"></div>
|
||||
</div>
|
||||
<div class="section finalize-actions">
|
||||
<button id="configure" type="submit" title="Update index with the latest changes">⚙️ Configure</button>
|
||||
<button id="reinitialize" type="submit" title="Regenerate index from scratch">🔄 Reinitialize</button>
|
||||
</div>
|
||||
|
|
|
@ -5,6 +5,9 @@
|
|||
<h2 class="section-title">
|
||||
<img class="card-icon" src="/static/assets/icons/github.svg" alt="Github">
|
||||
<span class="card-title-text">Github</span>
|
||||
<div class="instructions">
|
||||
<a href="https://github.com/khoj-ai/khoj/wiki/Setup-Github-integration">ⓘ Help</a>
|
||||
</div>
|
||||
</h2>
|
||||
<form>
|
||||
<table>
|
||||
|
|
|
@ -5,6 +5,9 @@
|
|||
<h2 class="section-title">
|
||||
<img class="card-icon" src="/static/assets/icons/notion.svg" alt="Notion">
|
||||
<span class="card-title-text">Notion</span>
|
||||
<div class="instructions">
|
||||
<a href="https://github.com/khoj-ai/khoj/wiki/Setup-Notion-Integration">ⓘ Help</a>
|
||||
</div>
|
||||
</h2>
|
||||
<form>
|
||||
<table>
|
||||
|
|
|
@ -34,7 +34,7 @@ from khoj.utils.state import SearchType
|
|||
from khoj.utils import state, constants
|
||||
from khoj.utils.yaml import save_config_to_file_updated_state
|
||||
from fastapi.responses import StreamingResponse, Response
|
||||
from khoj.routers.helpers import perform_chat_checks, generate_chat_response
|
||||
from khoj.routers.helpers import perform_chat_checks, generate_chat_response, update_telemetry_state
|
||||
from khoj.processor.conversation.gpt import extract_questions
|
||||
from fastapi.requests import Request
|
||||
|
||||
|
@ -56,15 +56,44 @@ if not state.demo:
|
|||
return state.config
|
||||
|
||||
@api.post("/config/data")
|
||||
async def set_config_data(updated_config: FullConfig):
|
||||
async def set_config_data(
|
||||
request: Request,
|
||||
updated_config: FullConfig,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
state.config = updated_config
|
||||
with open(state.config_file, "w") as outfile:
|
||||
yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
|
||||
outfile.close()
|
||||
|
||||
configuration_update_metadata = dict()
|
||||
|
||||
if state.config.content_type is not None:
|
||||
configuration_update_metadata["github"] = state.config.content_type.github is not None
|
||||
configuration_update_metadata["notion"] = state.config.content_type.notion is not None
|
||||
configuration_update_metadata["org"] = state.config.content_type.org is not None
|
||||
configuration_update_metadata["pdf"] = state.config.content_type.pdf is not None
|
||||
configuration_update_metadata["markdown"] = state.config.content_type.markdown is not None
|
||||
configuration_update_metadata["plugins"] = state.config.content_type.plugins is not None
|
||||
|
||||
if state.config.processor is not None:
|
||||
configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_config",
|
||||
client=client,
|
||||
metadata=configuration_update_metadata,
|
||||
)
|
||||
return state.config
|
||||
|
||||
@api.post("/config/data/content_type/github", status_code=200)
|
||||
async def set_content_config_github_data(updated_config: Union[GithubContentConfig, None]):
|
||||
async def set_content_config_github_data(
|
||||
request: Request,
|
||||
updated_config: Union[GithubContentConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
|
||||
if not state.config.content_type:
|
||||
|
@ -72,6 +101,14 @@ if not state.demo:
|
|||
else:
|
||||
state.config.content_type.github = updated_config
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": "github"},
|
||||
)
|
||||
|
||||
try:
|
||||
save_config_to_file_updated_state()
|
||||
return {"status": "ok"}
|
||||
|
@ -79,7 +116,11 @@ if not state.demo:
|
|||
return {"status": "error", "message": str(e)}
|
||||
|
||||
@api.post("/config/data/content_type/notion", status_code=200)
|
||||
async def set_content_config_notion_data(updated_config: Union[NotionContentConfig, None]):
|
||||
async def set_content_config_notion_data(
|
||||
request: Request,
|
||||
updated_config: Union[NotionContentConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
|
||||
if not state.config.content_type:
|
||||
|
@ -87,6 +128,14 @@ if not state.demo:
|
|||
else:
|
||||
state.config.content_type.notion = updated_config
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": "notion"},
|
||||
)
|
||||
|
||||
try:
|
||||
save_config_to_file_updated_state()
|
||||
return {"status": "ok"}
|
||||
|
@ -94,10 +143,22 @@ if not state.demo:
|
|||
return {"status": "error", "message": str(e)}
|
||||
|
||||
@api.post("/delete/config/data/content_type/{content_type}", status_code=200)
|
||||
async def remove_content_config_data(content_type: str):
|
||||
async def remove_content_config_data(
|
||||
request: Request,
|
||||
content_type: str,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
if not state.config or not state.config.content_type:
|
||||
return {"status": "ok"}
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="delete_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": content_type},
|
||||
)
|
||||
|
||||
if state.config.content_type:
|
||||
state.config.content_type[content_type] = None
|
||||
|
||||
|
@ -121,12 +182,23 @@ if not state.demo:
|
|||
return {"status": "error", "message": str(e)}
|
||||
|
||||
@api.post("/delete/config/data/processor/conversation", status_code=200)
|
||||
async def remove_processor_conversation_config_data():
|
||||
async def remove_processor_conversation_config_data(
|
||||
request: Request,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
if not state.config or not state.config.processor or not state.config.processor.conversation:
|
||||
return {"status": "ok"}
|
||||
|
||||
state.config.processor.conversation = None
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="delete_processor_config",
|
||||
client=client,
|
||||
metadata={"processor_type": "conversation"},
|
||||
)
|
||||
|
||||
try:
|
||||
save_config_to_file_updated_state()
|
||||
return {"status": "ok"}
|
||||
|
@ -134,7 +206,12 @@ if not state.demo:
|
|||
return {"status": "error", "message": str(e)}
|
||||
|
||||
@api.post("/config/data/content_type/{content_type}", status_code=200)
|
||||
async def set_content_config_data(content_type: str, updated_config: Union[TextContentConfig, None]):
|
||||
async def set_content_config_data(
|
||||
request: Request,
|
||||
content_type: str,
|
||||
updated_config: Union[TextContentConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
|
||||
if not state.config.content_type:
|
||||
|
@ -142,6 +219,14 @@ if not state.demo:
|
|||
else:
|
||||
state.config.content_type[content_type] = updated_config
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": content_type},
|
||||
)
|
||||
|
||||
try:
|
||||
save_config_to_file_updated_state()
|
||||
return {"status": "ok"}
|
||||
|
@ -149,11 +234,24 @@ if not state.demo:
|
|||
return {"status": "error", "message": str(e)}
|
||||
|
||||
@api.post("/config/data/processor/conversation", status_code=200)
|
||||
async def set_processor_conversation_config_data(updated_config: Union[ConversationProcessorConfig, None]):
|
||||
async def set_processor_conversation_config_data(
|
||||
request: Request,
|
||||
updated_config: Union[ConversationProcessorConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
|
||||
state.config.processor = ProcessorConfig(conversation=updated_config)
|
||||
state.processor_config = configure_processor(state.config.processor)
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"processor_type": "conversation"},
|
||||
)
|
||||
|
||||
try:
|
||||
save_config_to_file_updated_state()
|
||||
return {"status": "ok"}
|
||||
|
@ -369,20 +467,16 @@ async def search(
|
|||
# Cache results
|
||||
state.query_cache[query_cache_key] = results
|
||||
|
||||
user_state = {
|
||||
"client_host": request.client.host if request.client else "unknown",
|
||||
"user_agent": user_agent or "unknown",
|
||||
"referer": referer or "unknown",
|
||||
"host": host or "unknown",
|
||||
}
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="search",
|
||||
client=client,
|
||||
user_agent=user_agent,
|
||||
referer=referer,
|
||||
host=host,
|
||||
)
|
||||
|
||||
# Only log telemetry if query is new and not a continuation of previous query
|
||||
if state.previous_query is None or state.previous_query not in user_query:
|
||||
state.telemetry += [
|
||||
log_telemetry(
|
||||
telemetry_type="api", api="search", client=client, app_config=state.config.app, properties=user_state
|
||||
)
|
||||
]
|
||||
state.previous_query = user_query
|
||||
|
||||
end_time = time.time()
|
||||
|
@ -425,18 +519,15 @@ def update(
|
|||
else:
|
||||
logger.info("📬 Processor reconfigured via API")
|
||||
|
||||
user_state = {
|
||||
"client_host": request.client.host if request.client else None,
|
||||
"user_agent": user_agent or "unknown",
|
||||
"referer": referer or "unknown",
|
||||
"host": host or "unknown",
|
||||
}
|
||||
|
||||
state.telemetry += [
|
||||
log_telemetry(
|
||||
telemetry_type="api", api="update", client=client, app_config=state.config.app, properties=user_state
|
||||
)
|
||||
]
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="update",
|
||||
client=client,
|
||||
user_agent=user_agent,
|
||||
referer=referer,
|
||||
host=host,
|
||||
)
|
||||
|
||||
return {"status": "ok", "message": "khoj reloaded"}
|
||||
|
||||
|
@ -454,18 +545,15 @@ def chat_history(
|
|||
# Load Conversation History
|
||||
meta_log = state.processor_config.conversation.meta_log
|
||||
|
||||
user_state = {
|
||||
"client_host": request.client.host if request.client else None,
|
||||
"user_agent": user_agent or "unknown",
|
||||
"referer": referer or "unknown",
|
||||
"host": host or "unknown",
|
||||
}
|
||||
|
||||
state.telemetry += [
|
||||
log_telemetry(
|
||||
telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state
|
||||
)
|
||||
]
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="chat",
|
||||
client=client,
|
||||
user_agent=user_agent,
|
||||
referer=referer,
|
||||
host=host,
|
||||
)
|
||||
|
||||
return {"status": "ok", "response": meta_log.get("chat", [])}
|
||||
|
||||
|
@ -509,18 +597,15 @@ async def chat(
|
|||
|
||||
response_obj = {"response": actual_response, "context": compiled_references}
|
||||
|
||||
user_state = {
|
||||
"client_host": request.client.host if request.client else None,
|
||||
"user_agent": user_agent or "unknown",
|
||||
"referer": referer or "unknown",
|
||||
"host": host or "unknown",
|
||||
}
|
||||
|
||||
state.telemetry += [
|
||||
log_telemetry(
|
||||
telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state
|
||||
)
|
||||
]
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="chat",
|
||||
client=client,
|
||||
user_agent=user_agent,
|
||||
referer=referer,
|
||||
host=host,
|
||||
)
|
||||
|
||||
return Response(content=json.dumps(response_obj), media_type="application/json", status_code=200)
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
from fastapi import HTTPException
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import HTTPException, Request
|
||||
|
||||
from khoj.utils import state
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.utils.helpers import timer, log_telemetry
|
||||
from khoj.processor.conversation.gpt import converse
|
||||
from khoj.processor.conversation.utils import message_to_log, reciprocal_conversation_to_chatml
|
||||
|
||||
|
@ -24,6 +25,33 @@ def perform_chat_checks():
|
|||
)
|
||||
|
||||
|
||||
def update_telemetry_state(
|
||||
request: Request,
|
||||
telemetry_type: str,
|
||||
api: str,
|
||||
client: Optional[str] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
referer: Optional[str] = None,
|
||||
host: Optional[str] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
):
|
||||
user_state = {
|
||||
"client_host": request.client.host if request.client else None,
|
||||
"user_agent": user_agent or "unknown",
|
||||
"referer": referer or "unknown",
|
||||
"host": host or "unknown",
|
||||
}
|
||||
|
||||
if metadata:
|
||||
user_state.update(metadata)
|
||||
|
||||
state.telemetry += [
|
||||
log_telemetry(
|
||||
telemetry_type=telemetry_type, api=api, client=client, app_config=state.config.app, properties=user_state
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def generate_chat_response(
|
||||
q: str,
|
||||
meta_log: dict,
|
||||
|
|
Loading…
Add table
Reference in a new issue