2024-05-16 17:25:05 -07:00
|
|
|
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
2024-04-30 12:33:42 -07:00
|
|
|
const {
|
|
|
|
handleDefaultStreamResponseV2,
|
2025-01-16 13:49:06 -08:00
|
|
|
formatChatHistory,
|
2024-04-30 12:33:42 -07:00
|
|
|
} = require("../../helpers/chat/responses");
|
2024-08-15 12:13:28 -07:00
|
|
|
const { MODEL_MAP } = require("../modelMap");
|
2024-12-16 14:31:17 -08:00
|
|
|
const {
|
|
|
|
LLMPerformanceMonitor,
|
|
|
|
} = require("../../helpers/chat/LLMPerformanceMonitor");
|
2023-10-26 10:57:37 -07:00
|
|
|
|
2023-11-16 15:19:49 -08:00
|
|
|
class OpenAiLLM {
|
2024-01-17 12:59:25 -08:00
|
|
|
constructor(embedder = null, modelPreference = null) {
|
2023-10-30 15:44:03 -07:00
|
|
|
if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set.");
|
2024-04-30 12:33:42 -07:00
|
|
|
const { OpenAI: OpenAIApi } = require("openai");
|
2023-10-30 15:44:03 -07:00
|
|
|
|
2024-04-30 12:33:42 -07:00
|
|
|
this.openai = new OpenAIApi({
|
2023-06-07 21:31:35 -07:00
|
|
|
apiKey: process.env.OPEN_AI_KEY,
|
|
|
|
});
|
2024-05-13 14:31:49 -07:00
|
|
|
this.model = modelPreference || process.env.OPEN_MODEL_PREF || "gpt-4o";
|
2023-11-06 13:13:53 -08:00
|
|
|
this.limits = {
|
|
|
|
history: this.promptWindowLimit() * 0.15,
|
|
|
|
system: this.promptWindowLimit() * 0.15,
|
|
|
|
user: this.promptWindowLimit() * 0.7,
|
|
|
|
};
|
2023-11-16 15:19:49 -08:00
|
|
|
|
2024-05-16 17:25:05 -07:00
|
|
|
this.embedder = embedder ?? new NativeEmbedder();
|
2024-01-17 14:42:05 -08:00
|
|
|
this.defaultTemp = 0.7;
|
2023-11-06 13:13:53 -08:00
|
|
|
}
|
|
|
|
|
2024-10-15 19:42:13 -07:00
|
|
|
/**
|
|
|
|
* Check if the model is an o1 model.
|
|
|
|
* @returns {boolean}
|
|
|
|
*/
|
2025-02-03 14:19:21 -08:00
|
|
|
get isOTypeModel() {
|
|
|
|
return this.model.startsWith("o");
|
2024-10-15 19:42:13 -07:00
|
|
|
}
|
|
|
|
|
2023-12-28 14:42:34 -08:00
|
|
|
#appendContext(contextTexts = []) {
|
|
|
|
if (!contextTexts || !contextTexts.length) return "";
|
|
|
|
return (
|
|
|
|
"\nContext:\n" +
|
|
|
|
contextTexts
|
|
|
|
.map((text, i) => {
|
|
|
|
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
|
|
|
})
|
|
|
|
.join("")
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2023-11-13 15:07:30 -08:00
|
|
|
streamingEnabled() {
|
2025-02-03 14:19:21 -08:00
|
|
|
// o3-mini is the only o-type model that supports streaming
|
|
|
|
if (this.isOTypeModel && this.model !== "o3-mini") return false;
|
2024-05-01 16:52:28 -07:00
|
|
|
return "streamGetChatCompletion" in this;
|
2023-11-13 15:07:30 -08:00
|
|
|
}
|
|
|
|
|
2024-08-15 12:13:28 -07:00
|
|
|
static promptWindowLimit(modelName) {
|
|
|
|
return MODEL_MAP.openai[modelName] ?? 4_096;
|
|
|
|
}
|
|
|
|
|
2023-11-06 13:13:53 -08:00
|
|
|
promptWindowLimit() {
|
2024-08-15 12:13:28 -07:00
|
|
|
return MODEL_MAP.openai[this.model] ?? 4_096;
|
2023-06-03 19:28:07 -07:00
|
|
|
}
|
2023-07-28 12:05:38 -07:00
|
|
|
|
2024-04-16 14:54:39 -07:00
|
|
|
// Short circuit if name has 'gpt' since we now fetch models from OpenAI API
|
|
|
|
// via the user API key, so the model must be relevant and real.
|
|
|
|
// and if somehow it is not, chat will fail but that is caught.
|
|
|
|
// we don't want to hit the OpenAI api every chat because it will get spammed
|
|
|
|
// and introduce latency for no reason.
|
2023-10-31 11:38:28 -07:00
|
|
|
async isValidChatCompletionModel(modelName = "") {
|
2024-12-11 08:55:10 -08:00
|
|
|
const isPreset =
|
|
|
|
modelName.toLowerCase().includes("gpt") ||
|
2025-02-03 14:19:21 -08:00
|
|
|
modelName.toLowerCase().startsWith("o");
|
2023-10-31 11:38:28 -07:00
|
|
|
if (isPreset) return true;
|
|
|
|
|
2024-04-30 12:33:42 -07:00
|
|
|
const model = await this.openai.models
|
|
|
|
.retrieve(modelName)
|
|
|
|
.then((modelObj) => modelObj)
|
2023-10-31 11:38:28 -07:00
|
|
|
.catch(() => null);
|
|
|
|
return !!model;
|
2023-06-03 19:28:07 -07:00
|
|
|
}
|
|
|
|
|
2024-07-31 10:47:49 -07:00
|
|
|
/**
|
|
|
|
* Generates appropriate content array for a message + attachments.
|
|
|
|
* @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
|
|
|
|
* @returns {string|object[]}
|
|
|
|
*/
|
|
|
|
#generateContent({ userPrompt, attachments = [] }) {
|
|
|
|
if (!attachments.length) {
|
|
|
|
return userPrompt;
|
|
|
|
}
|
|
|
|
|
|
|
|
const content = [{ type: "text", text: userPrompt }];
|
|
|
|
for (let attachment of attachments) {
|
|
|
|
content.push({
|
|
|
|
type: "image_url",
|
|
|
|
image_url: {
|
|
|
|
url: attachment.contentString,
|
|
|
|
detail: "high",
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
return content.flat();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Construct the user prompt for this model.
|
|
|
|
* @param {{attachments: import("../../helpers").Attachment[]}} param0
|
|
|
|
* @returns
|
|
|
|
*/
|
2023-10-30 15:44:03 -07:00
|
|
|
constructPrompt({
|
|
|
|
systemPrompt = "",
|
|
|
|
contextTexts = [],
|
|
|
|
chatHistory = [],
|
|
|
|
userPrompt = "",
|
2024-07-31 10:47:49 -07:00
|
|
|
attachments = [], // This is the specific attachment for only this prompt
|
2023-10-30 15:44:03 -07:00
|
|
|
}) {
|
2024-10-15 19:42:13 -07:00
|
|
|
// o1 Models do not support the "system" role
|
|
|
|
// in order to combat this, we can use the "user" role as a replacement for now
|
|
|
|
// https://community.openai.com/t/o1-models-do-not-support-system-role-in-chat-completion/953880
|
2023-10-30 15:44:03 -07:00
|
|
|
const prompt = {
|
2025-02-03 14:19:21 -08:00
|
|
|
role: this.isOTypeModel ? "user" : "system",
|
2023-12-28 14:42:34 -08:00
|
|
|
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
2023-10-30 15:44:03 -07:00
|
|
|
};
|
2024-07-31 10:47:49 -07:00
|
|
|
return [
|
|
|
|
prompt,
|
2025-01-16 13:49:06 -08:00
|
|
|
...formatChatHistory(chatHistory, this.#generateContent),
|
2024-07-31 10:47:49 -07:00
|
|
|
{
|
|
|
|
role: "user",
|
|
|
|
content: this.#generateContent({ userPrompt, attachments }),
|
|
|
|
},
|
|
|
|
];
|
2023-10-30 15:44:03 -07:00
|
|
|
}
|
|
|
|
|
2023-10-31 11:38:28 -07:00
|
|
|
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
2023-11-06 13:13:53 -08:00
|
|
|
if (!(await this.isValidChatCompletionModel(this.model)))
|
2023-10-31 11:38:28 -07:00
|
|
|
throw new Error(
|
2023-11-06 13:13:53 -08:00
|
|
|
`OpenAI chat: ${this.model} is not valid for chat completion!`
|
2023-10-31 11:38:28 -07:00
|
|
|
);
|
|
|
|
|
2024-12-16 14:31:17 -08:00
|
|
|
const result = await LLMPerformanceMonitor.measureAsyncFunction(
|
|
|
|
this.openai.chat.completions
|
|
|
|
.create({
|
|
|
|
model: this.model,
|
|
|
|
messages,
|
2025-02-03 14:19:21 -08:00
|
|
|
temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
|
2024-12-16 14:31:17 -08:00
|
|
|
})
|
|
|
|
.catch((e) => {
|
|
|
|
throw new Error(e.message);
|
|
|
|
})
|
|
|
|
);
|
2023-07-28 12:05:38 -07:00
|
|
|
|
2024-12-16 14:31:17 -08:00
|
|
|
if (
|
|
|
|
!result.output.hasOwnProperty("choices") ||
|
|
|
|
result.output.choices.length === 0
|
|
|
|
)
|
2024-04-30 12:33:42 -07:00
|
|
|
return null;
|
2024-12-16 14:31:17 -08:00
|
|
|
|
|
|
|
return {
|
|
|
|
textResponse: result.output.choices[0].message.content,
|
|
|
|
metrics: {
|
|
|
|
prompt_tokens: result.output.usage.prompt_tokens || 0,
|
|
|
|
completion_tokens: result.output.usage.completion_tokens || 0,
|
|
|
|
total_tokens: result.output.usage.total_tokens || 0,
|
|
|
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
|
|
|
duration: result.duration,
|
|
|
|
},
|
|
|
|
};
|
2023-07-28 12:05:38 -07:00
|
|
|
}
|
2023-11-06 13:13:53 -08:00
|
|
|
|
2023-11-13 15:07:30 -08:00
|
|
|
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
|
|
|
|
if (!(await this.isValidChatCompletionModel(this.model)))
|
|
|
|
throw new Error(
|
|
|
|
`OpenAI chat: ${this.model} is not valid for chat completion!`
|
|
|
|
);
|
|
|
|
|
2024-12-16 14:31:17 -08:00
|
|
|
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
|
|
|
|
this.openai.chat.completions.create({
|
|
|
|
model: this.model,
|
|
|
|
stream: true,
|
|
|
|
messages,
|
2025-02-03 14:19:21 -08:00
|
|
|
temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
|
2024-12-16 14:31:17 -08:00
|
|
|
}),
|
|
|
|
messages
|
|
|
|
// runPromptTokenCalculation: true - We manually count the tokens because OpenAI does not provide them in the stream
|
|
|
|
// since we are not using the OpenAI API version that supports this `stream_options` param.
|
|
|
|
);
|
|
|
|
|
|
|
|
return measuredStreamRequest;
|
2023-11-13 15:07:30 -08:00
|
|
|
}
|
|
|
|
|
2024-02-07 08:15:14 -08:00
|
|
|
handleStream(response, stream, responseProps) {
|
2024-04-30 12:33:42 -07:00
|
|
|
return handleDefaultStreamResponseV2(response, stream, responseProps);
|
2024-02-07 08:15:14 -08:00
|
|
|
}
|
|
|
|
|
2023-11-16 15:19:49 -08:00
|
|
|
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
|
|
|
async embedTextInput(textInput) {
|
|
|
|
return await this.embedder.embedTextInput(textInput);
|
|
|
|
}
|
|
|
|
async embedChunks(textChunks = []) {
|
|
|
|
return await this.embedder.embedChunks(textChunks);
|
|
|
|
}
|
|
|
|
|
2023-11-06 13:13:53 -08:00
|
|
|
async compressMessages(promptArgs = {}, rawHistory = []) {
|
|
|
|
const { messageArrayCompressor } = require("../../helpers/chat");
|
|
|
|
const messageArray = this.constructPrompt(promptArgs);
|
|
|
|
return await messageArrayCompressor(this, messageArray, rawHistory);
|
|
|
|
}
|
2023-06-03 19:28:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
2023-10-30 15:44:03 -07:00
|
|
|
OpenAiLLM,
|
2023-06-03 19:28:07 -07:00
|
|
|
};
|