fix: stream response (#4853)

2026-04-17 02:06:41 +08:00 · 2025-05-21 10:21:20 +08:00
parent aa55f059d4
commit dd3c251603
6 changed files with 342 additions and 313 deletions
--- a/packages/service/core/ai/utils.ts
+++ b/packages/service/core/ai/utils.ts
@@ -18,15 +18,17 @@ import json5 from 'json5';
 */
 export const computedMaxToken = ({
  maxToken,
-  model
+  model,
+  min
 }: {
  maxToken?: number;
  model: LLMModelItemType;
+  min?: number;
 }) => {
  if (maxToken === undefined) return;

  maxToken = Math.min(maxToken, model.maxResponse);
-  return maxToken;
+  return Math.max(maxToken, min || 0);
 };

 // FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
@@ -178,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
    }
  }
  return {
-    text: parseReasoningContent(answer)[1],
+    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
    usage,
    toolCalls
  };
@@ -192,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
 }> => {
  const answer = response.choices?.[0]?.message?.content || '';
  const toolCalls = response.choices?.[0]?.message?.tool_calls;
+
  return {
-    text: answer,
+    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
    usage: response.usage,
    toolCalls
  };
@@ -240,6 +243,12 @@ export const parseLLMStreamResponse = () => {
  let citeBuffer = '';
  const maxCiteBufferLength = 32; // [Object](CITE)总长度为32

+  // Buffer
+  let buffer_finishReason: CompletionFinishReason = null;
+  let buffer_usage: CompletionUsage = getLLMDefaultUsage();
+  let buffer_reasoningContent = '';
+  let buffer_content = '';
+
  /* 
    parseThinkTag - 只控制是否主动解析 <think></think>，如果接口已经解析了，则不再解析。
    retainDatasetCite - 
@@ -257,6 +266,7 @@ export const parseLLMStreamResponse = () => {
        };
        finish_reason?: CompletionFinishReason;
      }[];
+      usage?: CompletionUsage;
    };
    parseThinkTag?: boolean;
    retainDatasetCite?: boolean;
@@ -266,72 +276,71 @@ export const parseLLMStreamResponse = () => {
    responseContent: string;
    finishReason: CompletionFinishReason;
  } => {
-    const finishReason = part.choices?.[0]?.finish_reason || null;
-    const content = part.choices?.[0]?.delta?.content || '';
-    // @ts-ignore
-    const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
-    const isStreamEnd = !!finishReason;
+    const data = (() => {
+      buffer_usage = part.usage || buffer_usage;

-    // Parse think
-    const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
-      if (reasoningContent || !parseThinkTag) {
-        isInThinkTag = false;
-        return { reasoningContent, content };
-      }
+      const finishReason = part.choices?.[0]?.finish_reason || null;
+      buffer_finishReason = finishReason || buffer_finishReason;

-      if (!content) {
-        return {
-          reasoningContent: '',
-          content: ''
-        };
-      }
+      const content = part.choices?.[0]?.delta?.content || '';
+      // @ts-ignore
+      const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
+      const isStreamEnd = !!buffer_finishReason;

-      // 如果不在 think 标签中，或者有 reasoningContent(接口已解析），则返回 reasoningContent 和 content
-      if (isInThinkTag === false) {
-        return {
-          reasoningContent: '',
-          content
-        };
-      }
+      // Parse think
+      const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
+        (() => {
+          if (reasoningContent || !parseThinkTag) {
+            isInThinkTag = false;
+            return { reasoningContent, content };
+          }

-      // 检测是否为 think 标签开头的数据
-      if (isInThinkTag === undefined) {
-        // Parse content think and answer
-        startTagBuffer += content;
-        // 太少内容时候，暂时不解析
-        if (startTagBuffer.length < thinkStartChars.length) {
-          if (isStreamEnd) {
-            const tmpContent = startTagBuffer;
-            startTagBuffer = '';
+          // 如果不在 think 标签中，或者有 reasoningContent(接口已解析），则返回 reasoningContent 和 content
+          if (isInThinkTag === false) {
            return {
              reasoningContent: '',
-              content: tmpContent
+              content
            };
          }
-          return {
-            reasoningContent: '',
-            content: ''
-          };
-        }

-        if (startTagBuffer.startsWith(thinkStartChars)) {
-          isInThinkTag = true;
-          return {
-            reasoningContent: startTagBuffer.slice(thinkStartChars.length),
-            content: ''
-          };
-        }
+          // 检测是否为 think 标签开头的数据
+          if (isInThinkTag === undefined) {
+            // Parse content think and answer
+            startTagBuffer += content;
+            // 太少内容时候，暂时不解析
+            if (startTagBuffer.length < thinkStartChars.length) {
+              if (isStreamEnd) {
+                const tmpContent = startTagBuffer;
+                startTagBuffer = '';
+                return {
+                  reasoningContent: '',
+                  content: tmpContent
+                };
+              }
+              return {
+                reasoningContent: '',
+                content: ''
+              };
+            }

-        // 如果未命中 think 标签，则认为不在 think 标签中，返回 buffer 内容作为 content
-        isInThinkTag = false;
-        return {
-          reasoningContent: '',
-          content: startTagBuffer
-        };
-      }
+            if (startTagBuffer.startsWith(thinkStartChars)) {
+              isInThinkTag = true;
+              return {
+                reasoningContent: startTagBuffer.slice(thinkStartChars.length),
+                content: ''
+              };
+            }

-      // 确认是 think 标签内容，开始返回 think 内容，并实时检测 </think>
-      /* 
+            // 如果未命中 think 标签，则认为不在 think 标签中，返回 buffer 内容作为 content
+            isInThinkTag = false;
+            return {
+              reasoningContent: '',
+              content: startTagBuffer
+            };
+          }
+
+          // 确认是 think 标签内容，开始返回 think 内容，并实时检测 </think>
+          /* 
        检测 </think> 方案。
        存储所有疑似 </think> 的内容，直到检测到完整的 </think> 标签或超出 </think> 长度。
        content 返回值包含以下几种情况:
@@ -342,124 +351,145 @@ export const parseLLMStreamResponse = () => {
          </think>abc - 完全命中尾标签
          k>abc - 命中一部分尾标签
      */
-      // endTagBuffer 专门用来记录疑似尾标签的内容
-      if (endTagBuffer) {
-        endTagBuffer += content;
-        if (endTagBuffer.includes(thinkEndChars)) {
-          isInThinkTag = false;
-          const answer = endTagBuffer.slice(thinkEndChars.length);
-          return {
-            reasoningContent: '',
-            content: answer
-          };
-        } else if (endTagBuffer.length >= thinkEndChars.length) {
-          // 缓存内容超出尾标签长度，且仍未命中 </think>，则认为本次猜测 </think> 失败，仍处于 think 阶段。
-          const tmp = endTagBuffer;
-          endTagBuffer = '';
-          return {
-            reasoningContent: tmp,
-            content: ''
-          };
-        }
-        return {
-          reasoningContent: '',
-          content: ''
-        };
-      } else if (content.includes(thinkEndChars)) {
-        // 返回内容，完整命中</think>，直接结束
-        isInThinkTag = false;
-        const [think, answer] = content.split(thinkEndChars);
-        return {
-          reasoningContent: think,
-          content: answer
-        };
-      } else {
-        // 无 buffer，且未命中 </think>，开始疑似 </think> 检测。
-        for (let i = 1; i < thinkEndChars.length; i++) {
-          const partialEndTag = thinkEndChars.slice(0, i);
-          // 命中一部分尾标签
-          if (content.endsWith(partialEndTag)) {
-            const think = content.slice(0, -partialEndTag.length);
-            endTagBuffer += partialEndTag;
+          // endTagBuffer 专门用来记录疑似尾标签的内容
+          if (endTagBuffer) {
+            endTagBuffer += content;
+            if (endTagBuffer.includes(thinkEndChars)) {
+              isInThinkTag = false;
+              const answer = endTagBuffer.slice(thinkEndChars.length);
+              return {
+                reasoningContent: '',
+                content: answer
+              };
+            } else if (endTagBuffer.length >= thinkEndChars.length) {
+              // 缓存内容超出尾标签长度，且仍未命中 </think>，则认为本次猜测 </think> 失败，仍处于 think 阶段。
+              const tmp = endTagBuffer;
+              endTagBuffer = '';
+              return {
+                reasoningContent: tmp,
+                content: ''
+              };
+            }
            return {
-              reasoningContent: think,
+              reasoningContent: '',
              content: ''
            };
+          } else if (content.includes(thinkEndChars)) {
+            // 返回内容，完整命中</think>，直接结束
+            isInThinkTag = false;
+            const [think, answer] = content.split(thinkEndChars);
+            return {
+              reasoningContent: think,
+              content: answer
+            };
+          } else {
+            // 无 buffer，且未命中 </think>，开始疑似 </think> 检测。
+            for (let i = 1; i < thinkEndChars.length; i++) {
+              const partialEndTag = thinkEndChars.slice(0, i);
+              // 命中一部分尾标签
+              if (content.endsWith(partialEndTag)) {
+                const think = content.slice(0, -partialEndTag.length);
+                endTagBuffer += partialEndTag;
+                return {
+                  reasoningContent: think,
+                  content: ''
+                };
+              }
+            }
          }
-        }
+
+          // 完全未命中尾标签，还是 think 阶段。
+          return {
+            reasoningContent: content,
+            content: ''
+          };
+        })();
+
+      // Parse datset cite
+      if (retainDatasetCite) {
+        return {
+          reasoningContent: parsedThinkReasoningContent,
+          content: parsedThinkContent,
+          responseContent: parsedThinkContent,
+          finishReason: buffer_finishReason
+        };
      }

-      // 完全未命中尾标签，还是 think 阶段。
-      return {
-        reasoningContent: content,
-        content: ''
-      };
-    })();
+      // 缓存包含 [ 的字符串，直到超出 maxCiteBufferLength 再一次性返回
+      const parseCite = (text: string) => {
+        // 结束时，返回所有剩余内容
+        if (isStreamEnd) {
+          const content = citeBuffer + text;
+          return {
+            content: removeDatasetCiteText(content, false)
+          };
+        }
+
+        // 新内容包含 [，初始化缓冲数据
+        if (text.includes('[')) {
+          const index = text.indexOf('[');
+          const beforeContent = citeBuffer + text.slice(0, index);
+          citeBuffer = text.slice(index);
+
+          // beforeContent 可能是：普通字符串，带 [ 的字符串
+          return {
+            content: removeDatasetCiteText(beforeContent, false)
+          };
+        }
+        // 处于 Cite 缓冲区，判断是否满足条件
+        else if (citeBuffer) {
+          citeBuffer += text;
+
+          // 检查缓冲区长度是否达到完整Quote长度或已经流结束
+          if (citeBuffer.length >= maxCiteBufferLength) {
+            const content = removeDatasetCiteText(citeBuffer, false);
+            citeBuffer = '';
+
+            return {
+              content
+            };
+          } else {
+            // 暂时不返回内容
+            return { content: '' };
+          }
+        }
+
+        return {
+          content: text
+        };
+      };
+      const { content: pasedCiteContent } = parseCite(parsedThinkContent);

-    // Parse datset cite
-    if (retainDatasetCite) {
      return {
        reasoningContent: parsedThinkReasoningContent,
        content: parsedThinkContent,
-        responseContent: parsedThinkContent,
-        finishReason
+        responseContent: pasedCiteContent,
+        finishReason: buffer_finishReason
      };
-    }
+    })();

-    // 缓存包含 [ 的字符串，直到超出 maxCiteBufferLength 再一次性返回
-    const parseCite = (text: string) => {
-      // 结束时，返回所有剩余内容
-      if (isStreamEnd) {
-        const content = citeBuffer + text;
-        return {
-          content: removeDatasetCiteText(content, false)
-        };
-      }
+    buffer_reasoningContent += data.reasoningContent;
+    buffer_content += data.content;

-      // 新内容包含 [，初始化缓冲数据
-      if (text.includes('[')) {
-        const index = text.indexOf('[');
-        const beforeContent = citeBuffer + text.slice(0, index);
-        citeBuffer = text.slice(index);
-
-        // beforeContent 可能是：普通字符串，带 [ 的字符串
-        return {
-          content: removeDatasetCiteText(beforeContent, false)
-        };
-      }
-      // 处于 Cite 缓冲区，判断是否满足条件
-      else if (citeBuffer) {
-        citeBuffer += text;
-
-        // 检查缓冲区长度是否达到完整Quote长度或已经流结束
-        if (citeBuffer.length >= maxCiteBufferLength) {
-          const content = removeDatasetCiteText(citeBuffer, false);
-          citeBuffer = '';
-
-          return {
-            content
-          };
-        } else {
-          // 暂时不返回内容
-          return { content: '' };
-        }
-      }
-
-      return {
-        content: text
-      };
-    };
-    const { content: pasedCiteContent } = parseCite(parsedThinkContent);
+    return data;
+  };

+  const getResponseData = () => {
    return {
-      reasoningContent: parsedThinkReasoningContent,
-      content: parsedThinkContent,
-      responseContent: pasedCiteContent,
-      finishReason
+      finish_reason: buffer_finishReason,
+      usage: buffer_usage,
+      reasoningContent: buffer_reasoningContent,
+      content: buffer_content
    };
  };

+  const updateFinishReason = (finishReason: CompletionFinishReason) => {
+    buffer_finishReason = finishReason;
+  };
+
  return {
-    parsePart
+    parsePart,
+    getResponseData,
+    updateFinishReason
  };
 };