Pass file path of each doc reference in references returned by API

- Pass file path of reference along with the compiled reference in
  list of references returned by chat API converts
- Update the structure of references from list of strings to list of
  dictionary (containing 'compiled' and 'file' keys)
- Pull out the compiled reference from the new references data struct
  wherever it was is being used
This commit is contained in:
Debanjum Singh Solanky 2024-05-26 17:18:38 +05:30
parent ba330712f8
commit e24ca9ec28
9 changed files with 24 additions and 15 deletions

View file

@ -62,7 +62,10 @@
return `${time_string}, ${date_string}`;
}
function generateReference(reference, index) {
function generateReference(referenceJson, index) {
let reference = referenceJson.hasOwnProperty("compiled") ? referenceJson.compiled : referenceJson;
let referenceFile = referenceJson.hasOwnProperty("file") ? referenceJson.file : null;
// Escape reference for HTML rendering
let escaped_ref = reference.replaceAll('"', '"');

View file

@ -247,7 +247,8 @@ export class KhojChatView extends KhojPaneView {
return referenceButton;
}
generateReference(messageEl: Element, reference: string, index: number) {
generateReference(messageEl: Element, referenceJson: any, index: number) {
let reference: string = referenceJson.hasOwnProperty("compiled") ? referenceJson.compiled : referenceJson;
// Escape reference for HTML rendering
let escaped_ref = reference.replace(/"/g, """)

View file

@ -103,7 +103,10 @@ To get started, just start typing below. You can also type / to see a list of co
return `${time_string}, ${date_string}`;
}
function generateReference(reference, index) {
function generateReference(referenceJson, index) {
let reference = referenceJson.hasOwnProperty("compiled") ? referenceJson.compiled : referenceJson;
let referenceFile = referenceJson.hasOwnProperty("file") ? referenceJson.file : null;
// Escape reference for HTML rendering
let escaped_ref = reference.replaceAll('"', '"');

View file

@ -142,7 +142,7 @@ def converse_offline(
# Initialize Variables
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
compiled_references_message = "\n\n".join({f"{item}" for item in references})
compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references})
current_date = datetime.now().strftime("%Y-%m-%d")

View file

@ -127,7 +127,7 @@ def converse(
"""
# Initialize Variables
current_date = datetime.now().strftime("%Y-%m-%d")
compiled_references = "\n\n".join({f"# {item}" for item in references})
compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references})
conversation_primer = prompts.query_prompt.format(query=user_query)

View file

@ -96,7 +96,7 @@ def save_to_conversation_log(
user: KhojUser,
meta_log: Dict,
user_message_time: str = None,
compiled_references: List[str] = [],
compiled_references: List[Dict[str, Any]] = [],
online_results: Dict[str, Any] = {},
inferred_queries: List[str] = [],
intent_type: str = "remember",

View file

@ -342,14 +342,14 @@ async def extract_references_and_questions(
# Collate search results as context for GPT
with timer("Searching knowledge base took", logger):
result_list = []
search_results = []
logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
if send_status_func:
inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
await send_status_func(f"**🔍 Searching Documents for:** {inferred_queries_str}")
for query in inferred_queries:
n_items = min(n, 3) if using_offline_chat else n
result_list.extend(
search_results.extend(
await execute_search(
user,
f"{query} {filters_in_query}",
@ -360,8 +360,10 @@ async def extract_references_and_questions(
dedupe=False,
)
)
result_list = text_search.deduplicated_search_responses(result_list)
compiled_references = [item.additional["compiled"] for item in result_list]
search_results = text_search.deduplicated_search_responses(search_results)
compiled_references = [
{"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results
]
return compiled_references, inferred_queries, defiltered_query

View file

@ -434,7 +434,7 @@ async def websocket_endpoint(
if compiled_references:
headings = "\n- " + "\n- ".join(
set([" ".join(c.split("Path: ")[1:]).split("\n ")[0] for c in compiled_references])
set([" ".join(c.get("compiled", c).split("Path: ")[1:]).split("\n ")[0] for c in compiled_references])
)
await send_status_update(f"**📜 Found Relevant Notes**: {headings}")

View file

@ -400,7 +400,7 @@ async def generate_better_image_prompt(
q: str,
conversation_history: str,
location_data: LocationData,
note_references: List[str],
note_references: List[Dict[str, Any]],
online_results: Optional[dict] = None,
) -> str:
"""
@ -415,7 +415,7 @@ async def generate_better_image_prompt(
else:
location_prompt = "Unknown"
user_references = "\n\n".join([f"# {item}" for item in note_references])
user_references = "\n\n".join([f"# {item['compiled']}" for item in note_references])
simplified_online_results = {}
@ -550,7 +550,7 @@ def generate_chat_response(
q: str,
meta_log: dict,
conversation: Conversation,
compiled_references: List[str] = [],
compiled_references: List[Dict] = [],
online_results: Dict[str, Dict] = {},
inferred_queries: List[str] = [],
conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
@ -634,7 +634,7 @@ async def text_to_image(
user: KhojUser,
conversation_log: dict,
location_data: LocationData,
references: List[str],
references: List[Dict[str, Any]],
online_results: Dict[str, Any],
send_status_func: Optional[Callable] = None,
) -> Tuple[Optional[str], int, Optional[str], str]: