Add backfilling on query for chat widget to improve UX (#2482)

2025-04-17 18:18:11 +00:00 · 2024-10-15 14:37:44 -07:00 · 2024-10-15 14:37:44 -07:00 · c3723ce2ff
commit c3723ce2ff
parent be6289d141
3 changed files with 75 additions and 25 deletions
--- a/server/endpoints/embed/index.js
+++ b/server/endpoints/embed/index.js
@ -56,6 +56,7 @@ function embeddedEndpoints(app) {
        writeResponseChunk(response, {
          id: uuidv4(),
          type: "abort",
+          sources: [],
          textResponse: null,
          close: true,
          error: e.message,
@ -72,11 +73,15 @@ function embeddedEndpoints(app) {
      try {
        const { sessionId } = request.params;
        const embed = response.locals.embedConfig;
+        const history = await EmbedChats.forEmbedByUser(
+          embed.id,
+          sessionId,
+          null,
+          null,
+          true
+        );

-        const history = await EmbedChats.forEmbedByUser(embed.id, sessionId);
-        response.status(200).json({
-          history: convertToChatHistory(history),
-        });
+        response.status(200).json({ history: convertToChatHistory(history) });
      } catch (e) {
        console.error(e.message, e);
        response.sendStatus(500).end();
--- a/server/models/embedChats.js
+++ b/server/models/embedChats.js
@ -1,5 +1,17 @@
+const { safeJsonParse } = require("../utils/http");
 const prisma = require("../utils/prisma");

+/**
+ * @typedef {Object} EmbedChat
+ * @property {number} id
+ * @property {number} embed_id
+ * @property {string} prompt
+ * @property {string} response
+ * @property {string} connection_information
+ * @property {string} session_id
+ * @property {boolean} include
+ */
+
 const EmbedChats = {
  new: async function ({
    embedId,
@ -25,11 +37,36 @@ const EmbedChats = {
    }
  },

+  /**
+   * Loops through each chat and filters out the sources from the response object.
+   * We do this when returning /history of an embed to the frontend to prevent inadvertent leaking
+   * of private sources the user may not have intended to share with users.
+   * @param {EmbedChat[]} chats
+   * @returns {EmbedChat[]} Returns a new array of chats with the sources filtered out of responses
+   */
+  filterSources: function (chats) {
+    return chats.map((chat) => {
+      const { response, ...rest } = chat;
+      const { sources, ...responseRest } = safeJsonParse(response);
+      return { ...rest, response: JSON.stringify(responseRest) };
+    });
+  },
+
+  /**
+   * Fetches chats for a given embed and session id.
+   * @param {number} embedId the id of the embed to fetch chats for
+   * @param {string} sessionId the id of the session to fetch chats for
+   * @param {number|null} limit the maximum number of chats to fetch
+   * @param {string|null} orderBy the order to fetch chats in
+   * @param {boolean} filterSources whether to filter out the sources from the response (default: false)
+   * @returns {Promise<EmbedChat[]>} Returns an array of chats for the given embed and session
+   */
  forEmbedByUser: async function (
    embedId = null,
    sessionId = null,
    limit = null,
-    orderBy = null
+    orderBy = null,
+    filterSources = false
  ) {
    if (!embedId || !sessionId) return [];

@ -43,7 +80,7 @@ const EmbedChats = {
        ...(limit !== null ? { take: limit } : {}),
        ...(orderBy !== null ? { orderBy } : { orderBy: { id: "asc" } }),
      });
-      return chats;
+      return filterSources ? this.filterSources(chats) : chats;
    } catch (error) {
      console.error(error.message);
      return [];
--- a/server/utils/chats/embed.js
+++ b/server/utils/chats/embed.js
@ -60,8 +60,7 @@ async function streamChatWithForEmbed(
  const { rawHistory, chatHistory } = await recentEmbedChatHistory(
    sessionId,
    embed,
-    messageLimit,
-    chatMode
+    messageLimit
  );

  // See stream.js comment for more information on this implementation.
@ -113,16 +112,27 @@ async function streamChatWithForEmbed(
    return;
  }

-  contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
+  const { fillSourceWindow } = require("../helpers/chat");
+  const filledSources = fillSourceWindow({
+    nDocs: embed.workspace?.topN || 4,
+    searchResults: vectorSearchResults.sources,
+    history: rawHistory,
+    filterIdentifiers: pinnedDocIdentifiers,
+  });
+
+  // Why does contextTexts get all the info, but sources only get current search?
+  // This is to give the ability of the LLM to "comprehend" a contextual response without
+  // populating the Citations under a response with documents the user "thinks" are irrelevant
+  // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
+  // If a past citation was used to answer the question - that is visible in the history so it logically makes sense
+  // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
+  // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
+  contextTexts = [...contextTexts, ...filledSources.contextTexts];
  sources = [...sources, ...vectorSearchResults.sources];

-  // If in query mode and no sources are found, do not
+  // If in query mode and no sources are found in current search or backfilled from history, do not
  // let the LLM try to hallucinate a response or use general knowledge
-  if (
-    chatMode === "query" &&
-    sources.length === 0 &&
-    pinnedDocIdentifiers.length === 0
-  ) {
+  if (chatMode === "query" && contextTexts.length === 0) {
    writeResponseChunk(response, {
      id: uuid,
      type: "textResponse",
@ -178,7 +188,7 @@ async function streamChatWithForEmbed(
  await EmbedChats.new({
    embedId: embed.id,
    prompt: message,
-    response: { text: completeText, type: chatMode },
+    response: { text: completeText, type: chatMode, sources },
    connection_information: response.locals.connection
      ? {
          ...response.locals.connection,
@ -190,15 +200,13 @@ async function streamChatWithForEmbed(
  return;
 }

-// On query we don't return message history. All other chat modes and when chatting
-// with no embeddings we return history.
-async function recentEmbedChatHistory(
-  sessionId,
-  embed,
-  messageLimit = 20,
-  chatMode = null
-) {
-  if (chatMode === "query") return { rawHistory: [], chatHistory: [] };
+/**
+ * @param {string} sessionId the session id of the user from embed widget
+ * @param {Object} embed the embed config object
+ * @param {Number} messageLimit the number of messages to return
+ * @returns {Promise<{rawHistory: import("@prisma/client").embed_chats[], chatHistory: {role: string, content: string}[]}>
+ */
+async function recentEmbedChatHistory(sessionId, embed, messageLimit = 20) {
  const rawHistory = (
    await EmbedChats.forEmbedByUser(embed.id, sessionId, messageLimit, {
      id: "desc",