diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
index 6dd1cdf50..372c79a7c 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
@@ -71,21 +71,6 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
         return false;
       }
 
-      // TODO: Delete this snippet once we have streaming stable.
-      // const chatResult = await Workspace.sendChat(
-      //   workspace,
-      //   promptMessage.userMessage,
-      //   window.localStorage.getItem(`workspace_chat_mode_${workspace.slug}`) ??
-      //   "chat",
-      // )
-      // handleChat(
-      //   chatResult,
-      //   setLoadingResponse,
-      //   setChatHistory,
-      //   remHistory,
-      //   _chatHistory
-      // )
-
       await Workspace.streamChat(
         workspace,
         promptMessage.userMessage,
diff --git a/frontend/src/models/workspace.js b/frontend/src/models/workspace.js
index aa0b9f744..d015918d4 100644
--- a/frontend/src/models/workspace.js
+++ b/frontend/src/models/workspace.js
@@ -168,22 +168,6 @@ const Workspace = {
     const data = await response.json();
     return { response, data };
   },
-
-  // TODO: Deprecated and should be removed from frontend.
-  sendChat: async function ({ slug }, message, mode = "query") {
-    const chatResult = await fetch(`${API_BASE}/workspace/${slug}/chat`, {
-      method: "POST",
-      body: JSON.stringify({ message, mode }),
-      headers: baseHeaders(),
-    })
-      .then((res) => res.json())
-      .catch((e) => {
-        console.error(e);
-        return null;
-      });
-
-    return chatResult;
-  },
 };
 
 export default Workspace;
diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js
index 032fe41c3..ffead3adb 100644
--- a/server/endpoints/api/workspace/index.js
+++ b/server/endpoints/api/workspace/index.js
@@ -196,10 +196,11 @@ function apiWorkspaceEndpoints(app) {
           return;
         }
 
-        await WorkspaceChats.delete({ workspaceId: Number(workspace.id) });
-        await DocumentVectors.deleteForWorkspace(Number(workspace.id));
-        await Document.delete({ workspaceId: Number(workspace.id) });
-        await Workspace.delete({ id: Number(workspace.id) });
+        const workspaceId = Number(workspace.id);
+        await WorkspaceChats.delete({ workspaceId: workspaceId });
+        await DocumentVectors.deleteForWorkspace(workspaceId);
+        await Document.delete({ workspaceId: workspaceId });
+        await Workspace.delete({ id: workspaceId });
         try {
           await VectorDb["delete-namespace"]({ namespace: slug });
         } catch (e) {
@@ -441,7 +442,7 @@ function apiWorkspaceEndpoints(app) {
    #swagger.tags = ['Workspaces']
    #swagger.description = 'Execute a chat with a workspace'
    #swagger.requestBody = {
-       description: 'prompt to send to the workspace and the type of conversation (query or chat).',
+       description: 'Send a prompt to the workspace and the type of conversation (query or chat).<br/><b>Query:</b> Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.<br/><b>Chat:</b> Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.',
        required: true,
        type: 'object',
        content: {
diff --git a/server/endpoints/chat.js b/server/endpoints/chat.js
index d0a2923c5..79fc10132 100644
--- a/server/endpoints/chat.js
+++ b/server/endpoints/chat.js
@@ -1,7 +1,6 @@
 const { v4: uuidv4 } = require("uuid");
 const { reqBody, userFromSession, multiUserMode } = require("../utils/http");
 const { Workspace } = require("../models/workspace");
-const { chatWithWorkspace } = require("../utils/chats");
 const { validatedRequest } = require("../utils/middleware/validatedRequest");
 const { WorkspaceChats } = require("../models/workspaceChats");
 const { SystemSettings } = require("../models/systemSettings");
@@ -95,85 +94,6 @@ function chatEndpoints(app) {
       }
     }
   );
-
-  app.post(
-    "/workspace/:slug/chat",
-    [validatedRequest],
-    async (request, response) => {
-      try {
-        const user = await userFromSession(request, response);
-        const { slug } = request.params;
-        const { message, mode = "query" } = reqBody(request);
-
-        const workspace = multiUserMode(response)
-          ? await Workspace.getWithUser(user, { slug })
-          : await Workspace.get({ slug });
-
-        if (!workspace) {
-          response.sendStatus(400).end();
-          return;
-        }
-
-        if (multiUserMode(response) && user.role !== "admin") {
-          const limitMessagesSetting = await SystemSettings.get({
-            label: "limit_user_messages",
-          });
-          const limitMessages = limitMessagesSetting?.value === "true";
-
-          if (limitMessages) {
-            const messageLimitSetting = await SystemSettings.get({
-              label: "message_limit",
-            });
-            const systemLimit = Number(messageLimitSetting?.value);
-
-            if (!!systemLimit) {
-              const currentChatCount = await WorkspaceChats.count({
-                user_id: user.id,
-                createdAt: {
-                  gte: new Date(new Date() - 24 * 60 * 60 * 1000),
-                },
-              });
-
-              if (currentChatCount >= systemLimit) {
-                response.status(500).json({
-                  id: uuidv4(),
-                  type: "abort",
-                  textResponse: null,
-                  sources: [],
-                  close: true,
-                  error: `You have met your maximum 24 hour chat quota of ${systemLimit} chats set by the instance administrators. Try again later.`,
-                });
-                return;
-              }
-            }
-          }
-        }
-
-        const result = await chatWithWorkspace(workspace, message, mode, user);
-        await Telemetry.sendTelemetry(
-          "sent_chat",
-          {
-            multiUserMode: multiUserMode(response),
-            LLMSelection: process.env.LLM_PROVIDER || "openai",
-            Embedder: process.env.EMBEDDING_ENGINE || "inherit",
-            VectorDbSelection: process.env.VECTOR_DB || "pinecone",
-          },
-          user?.id
-        );
-        response.status(200).json({ ...result });
-      } catch (e) {
-        console.error(e);
-        response.status(500).json({
-          id: uuidv4(),
-          type: "abort",
-          textResponse: null,
-          sources: [],
-          close: true,
-          error: e.message,
-        });
-      }
-    }
-  );
 }
 
 module.exports = { chatEndpoints };
diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json
index 184723ed7..7b675c44b 100644
--- a/server/swagger/openapi.json
+++ b/server/swagger/openapi.json
@@ -1598,7 +1598,7 @@
           }
         },
         "requestBody": {
-          "description": "prompt to send to the workspace and the type of conversation (query or chat).",
+          "description": "Send a prompt to the workspace and the type of conversation (query or chat).<br/><b>Query:</b> Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.<br/><b>Chat:</b> Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.",
           "required": true,
           "type": "object",
           "content": {
diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js
index 7e9be6e5b..7fdb47344 100644
--- a/server/utils/chats/index.js
+++ b/server/utils/chats/index.js
@@ -91,6 +91,18 @@ async function chatWithWorkspace(
   const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
   const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
   if (!hasVectorizedSpace || embeddingsCount === 0) {
+    if (chatMode === "query") {
+      return {
+        id: uuid,
+        type: "textResponse",
+        sources: [],
+        close: true,
+        error: null,
+        textResponse:
+          "There is no relevant information in this workspace to answer your query.",
+      };
+    }
+
     // If there are no embeddings - chat like a normal LLM chat interface.
     return await emptyEmbeddingChat({
       uuid,
@@ -131,6 +143,20 @@ async function chatWithWorkspace(
     };
   }
 
+  // If in query mode and no sources are found, do not
+  // let the LLM try to hallucinate a response or use general knowledge
+  if (chatMode === "query" && sources.length === 0) {
+    return {
+      id: uuid,
+      type: "textResponse",
+      sources: [],
+      close: true,
+      error: null,
+      textResponse:
+        "There is no relevant information in this workspace to answer your query.",
+    };
+  }
+
   // Compress message to ensure prompt passes token limit with room for response
   // and build system messages based on inputs and history.
   const messages = await LLMConnector.compressMessages(
diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js
index 73437eec5..11d4effd7 100644
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@@ -50,6 +50,19 @@ async function streamChatWithWorkspace(
   const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
   const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
   if (!hasVectorizedSpace || embeddingsCount === 0) {
+    if (chatMode === "query") {
+      writeResponseChunk(response, {
+        id: uuid,
+        type: "textResponse",
+        textResponse:
+          "There is no relevant information in this workspace to answer your query.",
+        sources: [],
+        close: true,
+        error: null,
+      });
+      return;
+    }
+
     // If there are no embeddings - chat like a normal LLM chat interface.
     return await streamEmptyEmbeddingChat({
       response,
@@ -93,6 +106,21 @@ async function streamChatWithWorkspace(
     return;
   }
 
+  // If in query mode and no sources are found, do not
+  // let the LLM try to hallucinate a response or use general knowledge
+  if (chatMode === "query" && sources.length === 0) {
+    writeResponseChunk(response, {
+      id: uuid,
+      type: "textResponse",
+      textResponse:
+        "There is no relevant information in this workspace to answer your query.",
+      sources: [],
+      close: true,
+      error: null,
+    });
+    return;
+  }
+
   // Compress message to ensure prompt passes token limit with room for response
   // and build system messages based on inputs and history.
   const messages = await LLMConnector.compressMessages(