Support historical message image inputs/attachments for n+1 queries ()

* Support historical message image inputs/attachments for n+1 queries

* patch gemini

* OpenRouter vision support cleanup

* xai vision history support

* Mistral logging

---------

Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
This commit is contained in:
Timothy Carambat 2025-01-16 13:49:06 -08:00 committed by GitHub
parent a2264f46ff
commit c4f75feb08
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 125 additions and 31 deletions
server/utils
AiProviders
anthropic
apipie
azureOpenAi
bedrock
gemini
genericOpenAi
groq
koboldCPP
liteLLM
lmStudio
localAi
mistral
novita
nvidiaNim
ollama
openAi
openRouter
textGenWebUI
xai
chats
helpers/chat

View file

@ -2,6 +2,7 @@ const { v4 } = require("uuid");
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
} = require("../../helpers/chat/responses");
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { MODEL_MAP } = require("../modelMap");
@ -99,7 +100,7 @@ class AnthropicLLM {
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -3,6 +3,7 @@ const { v4: uuidv4 } = require("uuid");
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
} = require("../../helpers/chat/responses");
const fs = require("fs");
const path = require("path");
@ -177,7 +178,7 @@ class ApiPieLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -5,6 +5,7 @@ const {
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
} = require("../../helpers/chat/responses");
class AzureOpenAiLLM {
@ -103,7 +104,7 @@ class AzureOpenAiLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -2,6 +2,7 @@ const { StringOutputParser } = require("@langchain/core/output_parsers");
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
} = require("../../helpers/chat/responses");
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
@ -199,7 +200,7 @@ class AWSBedrockLLM {
// AWS Mistral models do not support system prompts
if (this.model.startsWith("mistral"))
return [
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent, "spread"),
{
role: "user",
...this.#generateContent({ userPrompt, attachments }),
@ -212,7 +213,7 @@ class AWSBedrockLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent, "spread"),
{
role: "user",
...this.#generateContent({ userPrompt, attachments }),

View file

@ -7,6 +7,7 @@ const {
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
} = require("../../helpers/chat/responses");
const { MODEL_MAP } = require("../modelMap");
const { defaultGeminiModels, v1BetaModels } = require("./defaultModels");
@ -254,6 +255,7 @@ class GeminiLLM {
const models = await this.fetchModels(process.env.GEMINI_API_KEY);
return models.some((model) => model.id === modelName);
}
/**
* Generates appropriate content array for a message + attachments.
* @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
@ -290,7 +292,7 @@ class GeminiLLM {
return [
prompt,
{ role: "assistant", content: "Okay." },
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "USER_PROMPT",
content: this.#generateContent({ userPrompt, attachments }),
@ -306,8 +308,17 @@ class GeminiLLM {
.map((message) => {
if (message.role === "system")
return { role: "user", parts: [{ text: message.content }] };
if (message.role === "user")
if (message.role === "user") {
// If the content is an array - then we have already formatted the context so return it directly.
if (Array.isArray(message.content))
return { role: "user", parts: message.content };
// Otherwise, this was a regular user message with no attachments
// so we need to format it for Gemini
return { role: "user", parts: [{ text: message.content }] };
}
if (message.role === "assistant")
return { role: "model", parts: [{ text: message.content }] };
return null;

View file

@ -4,6 +4,7 @@ const {
} = require("../../helpers/chat/LLMPerformanceMonitor");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
const { toValidNumber } = require("../../http");
@ -133,7 +134,7 @@ class GenericOpenAiLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -89,6 +89,8 @@ class GroqLLM {
* Since we can only explicitly support the current models, this is a temporary solution.
* If the attachments are empty or the model is not a vision model, we will return the default prompt structure which will work for all models.
* If the attachments are present and the model is a vision model - we only return the user prompt with attachments - see comment at end of function for more.
*
* Historical attachments are also omitted from prompt chat history for the reasons above. (TDC: Dec 30, 2024)
*/
#conditionalPromptStruct({
systemPrompt = "",

View file

@ -2,6 +2,7 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
clientAbortedHandler,
writeResponseChunk,
formatChatHistory,
} = require("../../helpers/chat/responses");
const {
LLMPerformanceMonitor,
@ -116,7 +117,7 @@ class KoboldCPPLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -4,6 +4,7 @@ const {
} = require("../../helpers/chat/LLMPerformanceMonitor");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
class LiteLLM {
@ -115,7 +116,7 @@ class LiteLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -1,6 +1,7 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
const {
LLMPerformanceMonitor,
@ -117,7 +118,7 @@ class LMStudioLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -4,6 +4,7 @@ const {
} = require("../../helpers/chat/LLMPerformanceMonitor");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
class LocalAiLLM {
@ -103,7 +104,7 @@ class LocalAiLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -4,6 +4,7 @@ const {
} = require("../../helpers/chat/LLMPerformanceMonitor");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
class MistralLLM {
@ -26,6 +27,11 @@ class MistralLLM {
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.0;
this.log("Initialized with model:", this.model);
}
log(text, ...args) {
console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
}
#appendContext(contextTexts = []) {
@ -92,7 +98,7 @@ class MistralLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -3,6 +3,7 @@ const { v4: uuidv4 } = require("uuid");
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
} = require("../../helpers/chat/responses");
const fs = require("fs");
const path = require("path");
@ -177,7 +178,7 @@ class NovitaLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -4,6 +4,7 @@ const {
} = require("../../helpers/chat/LLMPerformanceMonitor");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
class NvidiaNimLLM {
@ -142,7 +143,7 @@ class NvidiaNimLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -1,6 +1,7 @@
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
} = require("../../helpers/chat/responses");
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
@ -120,7 +121,7 @@ class OllamaAILLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent, "spread"),
{
role: "user",
...this.#generateContent({ userPrompt, attachments }),

View file

@ -1,6 +1,7 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
const { MODEL_MAP } = require("../modelMap");
const {
@ -121,7 +122,7 @@ class OpenAiLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -3,6 +3,7 @@ const { v4: uuidv4 } = require("uuid");
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
} = require("../../helpers/chat/responses");
const fs = require("fs");
const path = require("path");
@ -47,6 +48,7 @@ class OpenRouterLLM {
fs.mkdirSync(cacheFolder, { recursive: true });
this.cacheModelPath = path.resolve(cacheFolder, "models.json");
this.cacheAtPath = path.resolve(cacheFolder, ".cached_at");
this.log("Initialized with model:", this.model);
}
log(text, ...args) {
@ -162,7 +164,6 @@ class OpenRouterLLM {
},
});
}
console.log(content.flat());
return content.flat();
}
@ -179,7 +180,7 @@ class OpenRouterLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -1,6 +1,7 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
const {
LLMPerformanceMonitor,
@ -113,7 +114,7 @@ class TextGenWebUILLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -4,6 +4,7 @@ const {
} = require("../../helpers/chat/LLMPerformanceMonitor");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
const { MODEL_MAP } = require("../modelMap");
@ -27,6 +28,11 @@ class XAiLLM {
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.log("Initialized with model:", this.model);
}
log(text, ...args) {
console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
}
#appendContext(contextTexts = []) {
@ -53,13 +59,8 @@ class XAiLLM {
return MODEL_MAP.xai[this.model] ?? 131_072;
}
isValidChatCompletionModel(modelName = "") {
switch (modelName) {
case "grok-beta":
return true;
default:
return false;
}
isValidChatCompletionModel(_modelName = "") {
return true;
}
/**
@ -103,7 +104,7 @@ class XAiLLM {
};
return [
prompt,
...chatHistory,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),

View file

@ -210,7 +210,7 @@ async function streamChatWithForEmbed(
* @param {string} sessionId the session id of the user from embed widget
* @param {Object} embed the embed config object
* @param {Number} messageLimit the number of messages to return
* @returns {Promise<{rawHistory: import("@prisma/client").embed_chats[], chatHistory: {role: string, content: string}[]}>
* @returns {Promise<{rawHistory: import("@prisma/client").embed_chats[], chatHistory: {role: string, content: string, attachments?: Object[]}[]}>
*/
async function recentEmbedChatHistory(sessionId, embed, messageLimit = 20) {
const rawHistory = (

View file

@ -164,6 +164,11 @@ function convertToChatHistory(history = []) {
return formattedHistory.flat();
}
/**
* Converts a chat history to a prompt history.
* @param {Object[]} history - The chat history to convert
* @returns {{role: string, content: string, attachments?: import("..").Attachment}[]}
*/
function convertToPromptHistory(history = []) {
const formattedHistory = [];
for (const record of history) {
@ -185,8 +190,18 @@ function convertToPromptHistory(history = []) {
}
formattedHistory.push([
{ role: "user", content: prompt },
{ role: "assistant", content: data.text },
{
role: "user",
content: prompt,
// if there are attachments, add them as a property to the user message so we can reuse them in chat history later if supported by the llm.
...(data?.attachments?.length > 0
? { attachments: data?.attachments }
: {}),
},
{
role: "assistant",
content: data.text,
},
]);
}
return formattedHistory.flat();
@ -197,10 +212,54 @@ function writeResponseChunk(response, data) {
return;
}
/**
* Formats the chat history to re-use attachments in the chat history
* that might have existed in the conversation earlier.
* @param {{role:string, content:string, attachments?: Object[]}[]} chatHistory
* @param {function} formatterFunction - The function to format the chat history from the llm provider
* @param {('asProperty'|'spread')} mode - "asProperty" or "spread". Determines how the content is formatted in the message object.
* @returns {object[]}
*/
function formatChatHistory(
chatHistory = [],
formatterFunction,
mode = "asProperty"
) {
return chatHistory.map((historicalMessage) => {
if (
historicalMessage?.role !== "user" || // Only user messages can have attachments
!historicalMessage?.attachments || // If there are no attachments, we can skip this
!historicalMessage.attachments.length // If there is an array but it is empty, we can skip this
)
return historicalMessage;
// Some providers, like Ollama, expect the content to be embedded in the message object.
if (mode === "spread") {
return {
role: historicalMessage.role,
...formatterFunction({
userPrompt: historicalMessage.content,
attachments: historicalMessage.attachments,
}),
};
}
// Most providers expect the content to be a property of the message object formatted like OpenAI models.
return {
role: historicalMessage.role,
content: formatterFunction({
userPrompt: historicalMessage.content,
attachments: historicalMessage.attachments,
}),
};
});
}
module.exports = {
handleDefaultStreamResponseV2,
convertToChatHistory,
convertToPromptHistory,
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
};