Add additional telemetry for system understanding (#316)

* Add additional telemetry in order to understand which data sources are the most useful
* Make actions side by side in the configuration page
* Restore main run command
* Update links to point to wiki pages for Github, Notion integrations
* Stanardize nomenclature of the api_type to use _config suffix

Remove header fields that aren't actually helpful for understanding config usage
This commit is contained in:
sabaimran 2023-07-14 10:14:07 -07:00 committed by GitHub
parent c2249eadb2
commit 37f7f9fd1d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 195 additions and 60 deletions

View file

@ -51,6 +51,10 @@
body.khoj-configure {
padding: 0;
}
div.section {
padding: 12px;
}
}
img.khoj-logo {
@ -69,6 +73,11 @@
display: grid;
justify-self: center;
}
div.instructions {
font-size: large;
}
.section-title {
margin: 0;
padding: 0 0 16px 0;
@ -162,6 +171,11 @@
max-width: 16px;
}
div.finalize-actions {
grid-auto-flow: column;
grid-gap: 24px;
}
@media screen and (max-width: 600px) {
.section-cards {
grid-template-columns: 1fr;

View file

@ -204,6 +204,8 @@
<input type="range" id="results-count-slider" name="results-count-slider" min="1" max="10" step="1" value="5">
</div>
<div id="status" style="display: none;"></div>
</div>
<div class="section finalize-actions">
<button id="configure" type="submit" title="Update index with the latest changes">⚙️ Configure</button>
<button id="reinitialize" type="submit" title="Regenerate index from scratch">🔄 Reinitialize</button>
</div>

View file

@ -5,6 +5,9 @@
<h2 class="section-title">
<img class="card-icon" src="/static/assets/icons/github.svg" alt="Github">
<span class="card-title-text">Github</span>
<div class="instructions">
<a href="https://github.com/khoj-ai/khoj/wiki/Setup-Github-integration">ⓘ Help</a>
</div>
</h2>
<form>
<table>

View file

@ -5,6 +5,9 @@
<h2 class="section-title">
<img class="card-icon" src="/static/assets/icons/notion.svg" alt="Notion">
<span class="card-title-text">Notion</span>
<div class="instructions">
<a href="https://github.com/khoj-ai/khoj/wiki/Setup-Notion-Integration">ⓘ Help</a>
</div>
</h2>
<form>
<table>

View file

@ -34,7 +34,7 @@ from khoj.utils.state import SearchType
from khoj.utils import state, constants
from khoj.utils.yaml import save_config_to_file_updated_state
from fastapi.responses import StreamingResponse, Response
from khoj.routers.helpers import perform_chat_checks, generate_chat_response
from khoj.routers.helpers import perform_chat_checks, generate_chat_response, update_telemetry_state
from khoj.processor.conversation.gpt import extract_questions
from fastapi.requests import Request
@ -56,15 +56,44 @@ if not state.demo:
return state.config
@api.post("/config/data")
async def set_config_data(updated_config: FullConfig):
async def set_config_data(
request: Request,
updated_config: FullConfig,
client: Optional[str] = None,
):
state.config = updated_config
with open(state.config_file, "w") as outfile:
yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
outfile.close()
configuration_update_metadata = dict()
if state.config.content_type is not None:
configuration_update_metadata["github"] = state.config.content_type.github is not None
configuration_update_metadata["notion"] = state.config.content_type.notion is not None
configuration_update_metadata["org"] = state.config.content_type.org is not None
configuration_update_metadata["pdf"] = state.config.content_type.pdf is not None
configuration_update_metadata["markdown"] = state.config.content_type.markdown is not None
configuration_update_metadata["plugins"] = state.config.content_type.plugins is not None
if state.config.processor is not None:
configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_config",
client=client,
metadata=configuration_update_metadata,
)
return state.config
@api.post("/config/data/content_type/github", status_code=200)
async def set_content_config_github_data(updated_config: Union[GithubContentConfig, None]):
async def set_content_config_github_data(
request: Request,
updated_config: Union[GithubContentConfig, None],
client: Optional[str] = None,
):
_initialize_config()
if not state.config.content_type:
@ -72,6 +101,14 @@ if not state.demo:
else:
state.config.content_type.github = updated_config
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"content_type": "github"},
)
try:
save_config_to_file_updated_state()
return {"status": "ok"}
@ -79,7 +116,11 @@ if not state.demo:
return {"status": "error", "message": str(e)}
@api.post("/config/data/content_type/notion", status_code=200)
async def set_content_config_notion_data(updated_config: Union[NotionContentConfig, None]):
async def set_content_config_notion_data(
request: Request,
updated_config: Union[NotionContentConfig, None],
client: Optional[str] = None,
):
_initialize_config()
if not state.config.content_type:
@ -87,6 +128,14 @@ if not state.demo:
else:
state.config.content_type.notion = updated_config
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"content_type": "notion"},
)
try:
save_config_to_file_updated_state()
return {"status": "ok"}
@ -94,10 +143,22 @@ if not state.demo:
return {"status": "error", "message": str(e)}
@api.post("/delete/config/data/content_type/{content_type}", status_code=200)
async def remove_content_config_data(content_type: str):
async def remove_content_config_data(
request: Request,
content_type: str,
client: Optional[str] = None,
):
if not state.config or not state.config.content_type:
return {"status": "ok"}
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_content_config",
client=client,
metadata={"content_type": content_type},
)
if state.config.content_type:
state.config.content_type[content_type] = None
@ -121,12 +182,23 @@ if not state.demo:
return {"status": "error", "message": str(e)}
@api.post("/delete/config/data/processor/conversation", status_code=200)
async def remove_processor_conversation_config_data():
async def remove_processor_conversation_config_data(
request: Request,
client: Optional[str] = None,
):
if not state.config or not state.config.processor or not state.config.processor.conversation:
return {"status": "ok"}
state.config.processor.conversation = None
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_processor_config",
client=client,
metadata={"processor_type": "conversation"},
)
try:
save_config_to_file_updated_state()
return {"status": "ok"}
@ -134,7 +206,12 @@ if not state.demo:
return {"status": "error", "message": str(e)}
@api.post("/config/data/content_type/{content_type}", status_code=200)
async def set_content_config_data(content_type: str, updated_config: Union[TextContentConfig, None]):
async def set_content_config_data(
request: Request,
content_type: str,
updated_config: Union[TextContentConfig, None],
client: Optional[str] = None,
):
_initialize_config()
if not state.config.content_type:
@ -142,6 +219,14 @@ if not state.demo:
else:
state.config.content_type[content_type] = updated_config
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"content_type": content_type},
)
try:
save_config_to_file_updated_state()
return {"status": "ok"}
@ -149,11 +234,24 @@ if not state.demo:
return {"status": "error", "message": str(e)}
@api.post("/config/data/processor/conversation", status_code=200)
async def set_processor_conversation_config_data(updated_config: Union[ConversationProcessorConfig, None]):
async def set_processor_conversation_config_data(
request: Request,
updated_config: Union[ConversationProcessorConfig, None],
client: Optional[str] = None,
):
_initialize_config()
state.config.processor = ProcessorConfig(conversation=updated_config)
state.processor_config = configure_processor(state.config.processor)
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"processor_type": "conversation"},
)
try:
save_config_to_file_updated_state()
return {"status": "ok"}
@ -369,20 +467,16 @@ async def search(
# Cache results
state.query_cache[query_cache_key] = results
user_state = {
"client_host": request.client.host if request.client else "unknown",
"user_agent": user_agent or "unknown",
"referer": referer or "unknown",
"host": host or "unknown",
}
update_telemetry_state(
request=request,
telemetry_type="api",
api="search",
client=client,
user_agent=user_agent,
referer=referer,
host=host,
)
# Only log telemetry if query is new and not a continuation of previous query
if state.previous_query is None or state.previous_query not in user_query:
state.telemetry += [
log_telemetry(
telemetry_type="api", api="search", client=client, app_config=state.config.app, properties=user_state
)
]
state.previous_query = user_query
end_time = time.time()
@ -425,18 +519,15 @@ def update(
else:
logger.info("📬 Processor reconfigured via API")
user_state = {
"client_host": request.client.host if request.client else None,
"user_agent": user_agent or "unknown",
"referer": referer or "unknown",
"host": host or "unknown",
}
state.telemetry += [
log_telemetry(
telemetry_type="api", api="update", client=client, app_config=state.config.app, properties=user_state
)
]
update_telemetry_state(
request=request,
telemetry_type="api",
api="update",
client=client,
user_agent=user_agent,
referer=referer,
host=host,
)
return {"status": "ok", "message": "khoj reloaded"}
@ -454,18 +545,15 @@ def chat_history(
# Load Conversation History
meta_log = state.processor_config.conversation.meta_log
user_state = {
"client_host": request.client.host if request.client else None,
"user_agent": user_agent or "unknown",
"referer": referer or "unknown",
"host": host or "unknown",
}
state.telemetry += [
log_telemetry(
telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state
)
]
update_telemetry_state(
request=request,
telemetry_type="api",
api="chat",
client=client,
user_agent=user_agent,
referer=referer,
host=host,
)
return {"status": "ok", "response": meta_log.get("chat", [])}
@ -509,18 +597,15 @@ async def chat(
response_obj = {"response": actual_response, "context": compiled_references}
user_state = {
"client_host": request.client.host if request.client else None,
"user_agent": user_agent or "unknown",
"referer": referer or "unknown",
"host": host or "unknown",
}
state.telemetry += [
log_telemetry(
telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state
)
]
update_telemetry_state(
request=request,
telemetry_type="api",
api="chat",
client=client,
user_agent=user_agent,
referer=referer,
host=host,
)
return Response(content=json.dumps(response_obj), media_type="application/json", status_code=200)

View file

@ -1,11 +1,12 @@
from fastapi import HTTPException
import logging
from datetime import datetime
from functools import partial
from typing import List
from typing import List, Optional
from fastapi import HTTPException, Request
from khoj.utils import state
from khoj.utils.helpers import timer
from khoj.utils.helpers import timer, log_telemetry
from khoj.processor.conversation.gpt import converse
from khoj.processor.conversation.utils import message_to_log, reciprocal_conversation_to_chatml
@ -24,6 +25,33 @@ def perform_chat_checks():
)
def update_telemetry_state(
request: Request,
telemetry_type: str,
api: str,
client: Optional[str] = None,
user_agent: Optional[str] = None,
referer: Optional[str] = None,
host: Optional[str] = None,
metadata: Optional[dict] = None,
):
user_state = {
"client_host": request.client.host if request.client else None,
"user_agent": user_agent or "unknown",
"referer": referer or "unknown",
"host": host or "unknown",
}
if metadata:
user_state.update(metadata)
state.telemetry += [
log_telemetry(
telemetry_type=telemetry_type, api=api, client=client, app_config=state.config.app, properties=user_state
)
]
def generate_chat_response(
q: str,
meta_log: dict,