From 2f9b785f424ba538bb37fc8de3842a74b650d8e1 Mon Sep 17 00:00:00 2001
From: Timothy Carambat <rambat1010@gmail.com>
Date: Thu, 23 May 2024 12:20:40 -0500
Subject: [PATCH] Patch handling of end chunk stream events for OpenAI
 endpoints (#1487)

* Patch handling of end chunk stream events for OpenAI endpoints

* update LiteLLM to use generic handler

* update for empty choices
---
 server/utils/AiProviders/liteLLM/index.js | 43 ++---------------------
 server/utils/helpers/chat/responses.js    | 10 ++++--
 2 files changed, 10 insertions(+), 43 deletions(-)

diff --git a/server/utils/AiProviders/liteLLM/index.js b/server/utils/AiProviders/liteLLM/index.js
index 2c7fa823c..28d0b71dc 100644
--- a/server/utils/AiProviders/liteLLM/index.js
+++ b/server/utils/AiProviders/liteLLM/index.js
@@ -1,7 +1,6 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
-  writeResponseChunk,
-  clientAbortedHandler,
+  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
 
 class LiteLLM {
@@ -113,45 +112,7 @@ class LiteLLM {
   }
 
   handleStream(response, stream, responseProps) {
-    const { uuid = uuidv4(), sources = [] } = responseProps;
-
-    return new Promise(async (resolve) => {
-      let fullText = "";
-
-      const handleAbort = () => clientAbortedHandler(resolve, fullText);
-      response.on("close", handleAbort);
-
-      for await (const chunk of stream) {
-        const message = chunk?.choices?.[0];
-        const token = message?.delta?.content;
-
-        if (token) {
-          fullText += token;
-          writeResponseChunk(response, {
-            uuid,
-            sources: [],
-            type: "textResponseChunk",
-            textResponse: token,
-            close: false,
-            error: false,
-          });
-        }
-
-        // LiteLLM does not give a finish reason in stream until the final chunk
-        if (message.finish_reason || message.finish_reason === "stop") {
-          writeResponseChunk(response, {
-            uuid,
-            sources,
-            type: "textResponseChunk",
-            textResponse: "",
-            close: true,
-            error: false,
-          });
-          response.removeListener("close", handleAbort);
-          resolve(fullText);
-        }
-      }
-    });
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
   }
 
   // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
diff --git a/server/utils/helpers/chat/responses.js b/server/utils/helpers/chat/responses.js
index d49c8a85a..d07eae308 100644
--- a/server/utils/helpers/chat/responses.js
+++ b/server/utils/helpers/chat/responses.js
@@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
         });
       }
 
-      // LocalAi returns '' and others return null.
-      if (message.finish_reason !== "" && message.finish_reason !== null) {
+      // LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
+      // Either way, the key `finish_reason` must be present to determine ending chunk.
+      if (
+        message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
+        message.finish_reason !== "" &&
+        message.finish_reason !== null
+      ) {
         writeResponseChunk(response, {
           uuid,
           sources,
@@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
         });
         response.removeListener("close", handleAbort);
         resolve(fullText);
+        break; // Break streaming when a valid finish_reason is first encountered
       }
     }
   });