Patch handling of end chunk stream events for OpenAI endpoints ()

* Patch handling of end chunk stream events for OpenAI endpoints

* update LiteLLM to use generic handler

* update for empty choices
This commit is contained in:
Timothy Carambat 2024-05-23 12:20:40 -05:00 committed by GitHub
parent 13fb63930b
commit 2f9b785f42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 10 additions and 43 deletions
server/utils
AiProviders/liteLLM
helpers/chat

View file

@ -1,7 +1,6 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { const {
writeResponseChunk, handleDefaultStreamResponseV2,
clientAbortedHandler,
} = require("../../helpers/chat/responses"); } = require("../../helpers/chat/responses");
class LiteLLM { class LiteLLM {
@ -113,45 +112,7 @@ class LiteLLM {
} }
handleStream(response, stream, responseProps) { handleStream(response, stream, responseProps) {
const { uuid = uuidv4(), sources = [] } = responseProps; return handleDefaultStreamResponseV2(response, stream, responseProps);
return new Promise(async (resolve) => {
let fullText = "";
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const chunk of stream) {
const message = chunk?.choices?.[0];
const token = message?.delta?.content;
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
// LiteLLM does not give a finish reason in stream until the final chunk
if (message.finish_reason || message.finish_reason === "stop") {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}
});
} }
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View file

@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
}); });
} }
// LocalAi returns '' and others return null. // LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
if (message.finish_reason !== "" && message.finish_reason !== null) { // Either way, the key `finish_reason` must be present to determine ending chunk.
if (
message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
message.finish_reason !== "" &&
message.finish_reason !== null
) {
writeResponseChunk(response, { writeResponseChunk(response, {
uuid, uuid,
sources, sources,
@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
}); });
response.removeListener("close", handleAbort); response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
break; // Break streaming when a valid finish_reason is first encountered
} }
} }
}); });