diff --git a/server/utils/chats/embed.js b/server/utils/chats/embed.js
index 94df306f5..533ea0c34 100644
--- a/server/utils/chats/embed.js
+++ b/server/utils/chats/embed.js
@@ -77,11 +77,10 @@ async function streamChatWithForEmbed(
     chatMode
   );
 
-  // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
-  // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // See stream.js comment for more information on this implementation.
   await new DocumentManager({
     workspace: embed.workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
   })
     .pinnedDocs()
     .then((pinnedDocs) => {
diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js
index 87d96c47c..38ce6c9bd 100644
--- a/server/utils/chats/index.js
+++ b/server/utils/chats/index.js
@@ -89,11 +89,10 @@ async function chatWithWorkspace(
     chatMode,
   });
 
-  // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
-  // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // See stream.js comment for more information on this implementation.
   await new DocumentManager({
     workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
   })
     .pinnedDocs()
     .then((pinnedDocs) => {
diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js
index 0e471161a..57f326664 100644
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@@ -105,9 +105,13 @@ async function streamChatWithWorkspace(
 
   // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
   // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // However we limit the maximum of appended context to 80% of its overall size, mostly because if it expands beyond this
+  // it will undergo prompt compression anyway to make it work. If there is so much pinned that the context here is bigger than
+  // what the model can support - it would get compressed anyway and that really is not the point of pinning. It is really best
+  // suited for high-context models.
   await new DocumentManager({
     workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
   })
     .pinnedDocs()
     .then((pinnedDocs) => {