Patch handling of end chunk stream events for OpenAI endpoints (#1487)

* Patch handling of end chunk stream events for OpenAI endpoints * update LiteLLM to use generic handler * update for empty choices
2025-04-23 13:08:11 +00:00 · 2024-05-23 12:20:40 -05:00 · 2024-05-23 12:20:40 -05:00 · 2f9b785f42
commit 2f9b785f42
parent 13fb63930b
2 changed files with 10 additions and 43 deletions
--- a/server/utils/AiProviders/liteLLM/index.js
+++ b/server/utils/AiProviders/liteLLM/index.js
@ -1,7 +1,6 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
-  writeResponseChunk,
+  handleDefaultStreamResponseV2,
  clientAbortedHandler,
 } = require("../../helpers/chat/responses");
 class LiteLLM {
@ -113,45 +112,7 @@ class LiteLLM {
  }
  handleStream(response, stream, responseProps) {
-    const { uuid = uuidv4(), sources = [] } = responseProps;
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
    return new Promise(async (resolve) => {
      let fullText = "";
      const handleAbort = () => clientAbortedHandler(resolve, fullText);
      response.on("close", handleAbort);
      for await (const chunk of stream) {
        const message = chunk?.choices?.[0];
        const token = message?.delta?.content;
        if (token) {
          fullText += token;
          writeResponseChunk(response, {
            uuid,
            sources: [],
            type: "textResponseChunk",
            textResponse: token,
            close: false,
            error: false,
          });
        }
        // LiteLLM does not give a finish reason in stream until the final chunk
        if (message.finish_reason || message.finish_reason === "stop") {
          writeResponseChunk(response, {
            uuid,
            sources,
            type: "textResponseChunk",
            textResponse: "",
            close: true,
            error: false,
          });
          response.removeListener("close", handleAbort);
          resolve(fullText);
        }
      }
    });
  }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/helpers/chat/responses.js
+++ b/server/utils/helpers/chat/responses.js
@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
        });
      }
-      // LocalAi returns '' and others return null.
+      // LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
-      if (message.finish_reason !== "" && message.finish_reason !== null) {
+      // Either way, the key `finish_reason` must be present to determine ending chunk.
      if (
        message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
        message.finish_reason !== "" &&
        message.finish_reason !== null
      ) {
        writeResponseChunk(response, {
          uuid,
          sources,
@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
        });
        response.removeListener("close", handleAbort);
        resolve(fullText);
        break; // Break streaming when a valid finish_reason is first encountered
      }
    }
  });