diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx index 6dd1cdf50..372c79a7c 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx @@ -71,21 +71,6 @@ export default function ChatContainer({ workspace, knownHistory = [] }) { return false; } - // TODO: Delete this snippet once we have streaming stable. - // const chatResult = await Workspace.sendChat( - // workspace, - // promptMessage.userMessage, - // window.localStorage.getItem(`workspace_chat_mode_${workspace.slug}`) ?? - // "chat", - // ) - // handleChat( - // chatResult, - // setLoadingResponse, - // setChatHistory, - // remHistory, - // _chatHistory - // ) - await Workspace.streamChat( workspace, promptMessage.userMessage, diff --git a/frontend/src/models/workspace.js b/frontend/src/models/workspace.js index aa0b9f744..d015918d4 100644 --- a/frontend/src/models/workspace.js +++ b/frontend/src/models/workspace.js @@ -168,22 +168,6 @@ const Workspace = { const data = await response.json(); return { response, data }; }, - - // TODO: Deprecated and should be removed from frontend. - sendChat: async function ({ slug }, message, mode = "query") { - const chatResult = await fetch(`${API_BASE}/workspace/${slug}/chat`, { - method: "POST", - body: JSON.stringify({ message, mode }), - headers: baseHeaders(), - }) - .then((res) => res.json()) - .catch((e) => { - console.error(e); - return null; - }); - - return chatResult; - }, }; export default Workspace; diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 032fe41c3..ffead3adb 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -196,10 +196,11 @@ function apiWorkspaceEndpoints(app) { return; } - await WorkspaceChats.delete({ workspaceId: Number(workspace.id) }); - await DocumentVectors.deleteForWorkspace(Number(workspace.id)); - await Document.delete({ workspaceId: Number(workspace.id) }); - await Workspace.delete({ id: Number(workspace.id) }); + const workspaceId = Number(workspace.id); + await WorkspaceChats.delete({ workspaceId: workspaceId }); + await DocumentVectors.deleteForWorkspace(workspaceId); + await Document.delete({ workspaceId: workspaceId }); + await Workspace.delete({ id: workspaceId }); try { await VectorDb["delete-namespace"]({ namespace: slug }); } catch (e) { @@ -441,7 +442,7 @@ function apiWorkspaceEndpoints(app) { #swagger.tags = ['Workspaces'] #swagger.description = 'Execute a chat with a workspace' #swagger.requestBody = { - description: 'prompt to send to the workspace and the type of conversation (query or chat).', + description: 'Send a prompt to the workspace and the type of conversation (query or chat).<br/><b>Query:</b> Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.<br/><b>Chat:</b> Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.', required: true, type: 'object', content: { diff --git a/server/endpoints/chat.js b/server/endpoints/chat.js index d0a2923c5..79fc10132 100644 --- a/server/endpoints/chat.js +++ b/server/endpoints/chat.js @@ -1,7 +1,6 @@ const { v4: uuidv4 } = require("uuid"); const { reqBody, userFromSession, multiUserMode } = require("../utils/http"); const { Workspace } = require("../models/workspace"); -const { chatWithWorkspace } = require("../utils/chats"); const { validatedRequest } = require("../utils/middleware/validatedRequest"); const { WorkspaceChats } = require("../models/workspaceChats"); const { SystemSettings } = require("../models/systemSettings"); @@ -95,85 +94,6 @@ function chatEndpoints(app) { } } ); - - app.post( - "/workspace/:slug/chat", - [validatedRequest], - async (request, response) => { - try { - const user = await userFromSession(request, response); - const { slug } = request.params; - const { message, mode = "query" } = reqBody(request); - - const workspace = multiUserMode(response) - ? await Workspace.getWithUser(user, { slug }) - : await Workspace.get({ slug }); - - if (!workspace) { - response.sendStatus(400).end(); - return; - } - - if (multiUserMode(response) && user.role !== "admin") { - const limitMessagesSetting = await SystemSettings.get({ - label: "limit_user_messages", - }); - const limitMessages = limitMessagesSetting?.value === "true"; - - if (limitMessages) { - const messageLimitSetting = await SystemSettings.get({ - label: "message_limit", - }); - const systemLimit = Number(messageLimitSetting?.value); - - if (!!systemLimit) { - const currentChatCount = await WorkspaceChats.count({ - user_id: user.id, - createdAt: { - gte: new Date(new Date() - 24 * 60 * 60 * 1000), - }, - }); - - if (currentChatCount >= systemLimit) { - response.status(500).json({ - id: uuidv4(), - type: "abort", - textResponse: null, - sources: [], - close: true, - error: `You have met your maximum 24 hour chat quota of ${systemLimit} chats set by the instance administrators. Try again later.`, - }); - return; - } - } - } - } - - const result = await chatWithWorkspace(workspace, message, mode, user); - await Telemetry.sendTelemetry( - "sent_chat", - { - multiUserMode: multiUserMode(response), - LLMSelection: process.env.LLM_PROVIDER || "openai", - Embedder: process.env.EMBEDDING_ENGINE || "inherit", - VectorDbSelection: process.env.VECTOR_DB || "pinecone", - }, - user?.id - ); - response.status(200).json({ ...result }); - } catch (e) { - console.error(e); - response.status(500).json({ - id: uuidv4(), - type: "abort", - textResponse: null, - sources: [], - close: true, - error: e.message, - }); - } - } - ); } module.exports = { chatEndpoints }; diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index 184723ed7..7b675c44b 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1598,7 +1598,7 @@ } }, "requestBody": { - "description": "prompt to send to the workspace and the type of conversation (query or chat).", + "description": "Send a prompt to the workspace and the type of conversation (query or chat).<br/><b>Query:</b> Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.<br/><b>Chat:</b> Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.", "required": true, "type": "object", "content": { diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js index 7e9be6e5b..7fdb47344 100644 --- a/server/utils/chats/index.js +++ b/server/utils/chats/index.js @@ -91,6 +91,18 @@ async function chatWithWorkspace( const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); if (!hasVectorizedSpace || embeddingsCount === 0) { + if (chatMode === "query") { + return { + id: uuid, + type: "textResponse", + sources: [], + close: true, + error: null, + textResponse: + "There is no relevant information in this workspace to answer your query.", + }; + } + // If there are no embeddings - chat like a normal LLM chat interface. return await emptyEmbeddingChat({ uuid, @@ -131,6 +143,20 @@ async function chatWithWorkspace( }; } + // If in query mode and no sources are found, do not + // let the LLM try to hallucinate a response or use general knowledge + if (chatMode === "query" && sources.length === 0) { + return { + id: uuid, + type: "textResponse", + sources: [], + close: true, + error: null, + textResponse: + "There is no relevant information in this workspace to answer your query.", + }; + } + // Compress message to ensure prompt passes token limit with room for response // and build system messages based on inputs and history. const messages = await LLMConnector.compressMessages( diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js index 73437eec5..11d4effd7 100644 --- a/server/utils/chats/stream.js +++ b/server/utils/chats/stream.js @@ -50,6 +50,19 @@ async function streamChatWithWorkspace( const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); if (!hasVectorizedSpace || embeddingsCount === 0) { + if (chatMode === "query") { + writeResponseChunk(response, { + id: uuid, + type: "textResponse", + textResponse: + "There is no relevant information in this workspace to answer your query.", + sources: [], + close: true, + error: null, + }); + return; + } + // If there are no embeddings - chat like a normal LLM chat interface. return await streamEmptyEmbeddingChat({ response, @@ -93,6 +106,21 @@ async function streamChatWithWorkspace( return; } + // If in query mode and no sources are found, do not + // let the LLM try to hallucinate a response or use general knowledge + if (chatMode === "query" && sources.length === 0) { + writeResponseChunk(response, { + id: uuid, + type: "textResponse", + textResponse: + "There is no relevant information in this workspace to answer your query.", + sources: [], + close: true, + error: null, + }); + return; + } + // Compress message to ensure prompt passes token limit with room for response // and build system messages based on inputs and history. const messages = await LLMConnector.compressMessages(