mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-23 13:08:11 +00:00
Patch handling of end chunk stream events for OpenAI endpoints (#1487)
* Patch handling of end chunk stream events for OpenAI endpoints * update LiteLLM to use generic handler * update for empty choices
This commit is contained in:
parent
13fb63930b
commit
2f9b785f42
2 changed files with 10 additions and 43 deletions
server/utils
|
@ -1,7 +1,6 @@
|
||||||
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||||
const {
|
const {
|
||||||
writeResponseChunk,
|
handleDefaultStreamResponseV2,
|
||||||
clientAbortedHandler,
|
|
||||||
} = require("../../helpers/chat/responses");
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
class LiteLLM {
|
class LiteLLM {
|
||||||
|
@ -113,45 +112,7 @@ class LiteLLM {
|
||||||
}
|
}
|
||||||
|
|
||||||
handleStream(response, stream, responseProps) {
|
handleStream(response, stream, responseProps) {
|
||||||
const { uuid = uuidv4(), sources = [] } = responseProps;
|
return handleDefaultStreamResponseV2(response, stream, responseProps);
|
||||||
|
|
||||||
return new Promise(async (resolve) => {
|
|
||||||
let fullText = "";
|
|
||||||
|
|
||||||
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
|
||||||
response.on("close", handleAbort);
|
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
|
||||||
const message = chunk?.choices?.[0];
|
|
||||||
const token = message?.delta?.content;
|
|
||||||
|
|
||||||
if (token) {
|
|
||||||
fullText += token;
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources: [],
|
|
||||||
type: "textResponseChunk",
|
|
||||||
textResponse: token,
|
|
||||||
close: false,
|
|
||||||
error: false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// LiteLLM does not give a finish reason in stream until the final chunk
|
|
||||||
if (message.finish_reason || message.finish_reason === "stop") {
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources,
|
|
||||||
type: "textResponseChunk",
|
|
||||||
textResponse: "",
|
|
||||||
close: true,
|
|
||||||
error: false,
|
|
||||||
});
|
|
||||||
response.removeListener("close", handleAbort);
|
|
||||||
resolve(fullText);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||||
|
|
|
@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// LocalAi returns '' and others return null.
|
// LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
|
||||||
if (message.finish_reason !== "" && message.finish_reason !== null) {
|
// Either way, the key `finish_reason` must be present to determine ending chunk.
|
||||||
|
if (
|
||||||
|
message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
|
||||||
|
message.finish_reason !== "" &&
|
||||||
|
message.finish_reason !== null
|
||||||
|
) {
|
||||||
writeResponseChunk(response, {
|
writeResponseChunk(response, {
|
||||||
uuid,
|
uuid,
|
||||||
sources,
|
sources,
|
||||||
|
@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
|
||||||
});
|
});
|
||||||
response.removeListener("close", handleAbort);
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
|
break; // Break streaming when a valid finish_reason is first encountered
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
Loading…
Add table
Reference in a new issue