fix: stream response (#4853)

2025-07-21 03:35:36 +00:00 · 2025-05-21 10:21:20 +08:00
parent aa55f059d4
commit dd3c251603
6 changed files with 342 additions and 313 deletions
--- a/docSite/content/zh-cn/docs/development/upgrading/4910.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4910.md
@@ -19,4 +19,6 @@ weight: 790

 ## 🐛 修复

-1. 全文检索多知识库时排序得分排序不正确
+1. 全文检索多知识库时排序得分排序不正确。
+2. 流响应捕获 finish_reason 可能不正确。
+3. 工具调用模式，未保存思考输出。
--- a/packages/service/core/ai/utils.ts
+++ b/packages/service/core/ai/utils.ts
@@ -18,15 +18,17 @@ import json5 from 'json5';
 */
 export const computedMaxToken = ({
  maxToken,
-  model
+  model,
+  min
 }: {
  maxToken?: number;
  model: LLMModelItemType;
+  min?: number;
 }) => {
  if (maxToken === undefined) return;

  maxToken = Math.min(maxToken, model.maxResponse);
-  return maxToken;
+  return Math.max(maxToken, min || 0);
 };

 // FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
@@ -178,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
    }
  }
  return {
-    text: parseReasoningContent(answer)[1],
+    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
    usage,
    toolCalls
  };
@@ -192,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
 }> => {
  const answer = response.choices?.[0]?.message?.content || '';
  const toolCalls = response.choices?.[0]?.message?.tool_calls;
+
  return {
-    text: answer,
+    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
    usage: response.usage,
    toolCalls
  };
@@ -240,6 +243,12 @@ export const parseLLMStreamResponse = () => {
  let citeBuffer = '';
  const maxCiteBufferLength = 32; // [Object](CITE)总长度为32

+  // Buffer
+  let buffer_finishReason: CompletionFinishReason = null;
+  let buffer_usage: CompletionUsage = getLLMDefaultUsage();
+  let buffer_reasoningContent = '';
+  let buffer_content = '';
+
  /* 
    parseThinkTag - 只控制是否主动解析 <think></think>，如果接口已经解析了，则不再解析。
    retainDatasetCite - 
@@ -257,6 +266,7 @@ export const parseLLMStreamResponse = () => {
        };
        finish_reason?: CompletionFinishReason;
      }[];
+      usage?: CompletionUsage;
    };
    parseThinkTag?: boolean;
    retainDatasetCite?: boolean;
@@ -266,72 +276,71 @@ export const parseLLMStreamResponse = () => {
    responseContent: string;
    finishReason: CompletionFinishReason;
  } => {
-    const finishReason = part.choices?.[0]?.finish_reason || null;
-    const content = part.choices?.[0]?.delta?.content || '';
-    // @ts-ignore
-    const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
-    const isStreamEnd = !!finishReason;
+    const data = (() => {
+      buffer_usage = part.usage || buffer_usage;

-    // Parse think
-    const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
-      if (reasoningContent || !parseThinkTag) {
-        isInThinkTag = false;
-        return { reasoningContent, content };
-      }
+      const finishReason = part.choices?.[0]?.finish_reason || null;
+      buffer_finishReason = finishReason || buffer_finishReason;

-      if (!content) {
-        return {
-          reasoningContent: '',
-          content: ''
-        };
-      }
+      const content = part.choices?.[0]?.delta?.content || '';
+      // @ts-ignore
+      const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
+      const isStreamEnd = !!buffer_finishReason;

-      // 如果不在 think 标签中，或者有 reasoningContent(接口已解析），则返回 reasoningContent 和 content
-      if (isInThinkTag === false) {
-        return {
-          reasoningContent: '',
-          content
-        };
-      }
+      // Parse think
+      const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
+        (() => {
+          if (reasoningContent || !parseThinkTag) {
+            isInThinkTag = false;
+            return { reasoningContent, content };
+          }

-      // 检测是否为 think 标签开头的数据
-      if (isInThinkTag === undefined) {
-        // Parse content think and answer
-        startTagBuffer += content;
-        // 太少内容时候，暂时不解析
-        if (startTagBuffer.length < thinkStartChars.length) {
-          if (isStreamEnd) {
-            const tmpContent = startTagBuffer;
-            startTagBuffer = '';
+          // 如果不在 think 标签中，或者有 reasoningContent(接口已解析），则返回 reasoningContent 和 content
+          if (isInThinkTag === false) {
            return {
              reasoningContent: '',
-              content: tmpContent
+              content
            };
          }
-          return {
-            reasoningContent: '',
-            content: ''
-          };
-        }

-        if (startTagBuffer.startsWith(thinkStartChars)) {
-          isInThinkTag = true;
-          return {
-            reasoningContent: startTagBuffer.slice(thinkStartChars.length),
-            content: ''
-          };
-        }
+          // 检测是否为 think 标签开头的数据
+          if (isInThinkTag === undefined) {
+            // Parse content think and answer
+            startTagBuffer += content;
+            // 太少内容时候，暂时不解析
+            if (startTagBuffer.length < thinkStartChars.length) {
+              if (isStreamEnd) {
+                const tmpContent = startTagBuffer;
+                startTagBuffer = '';
+                return {
+                  reasoningContent: '',
+                  content: tmpContent
+                };
+              }
+              return {
+                reasoningContent: '',
+                content: ''
+              };
+            }

-        // 如果未命中 think 标签，则认为不在 think 标签中，返回 buffer 内容作为 content
-        isInThinkTag = false;
-        return {
-          reasoningContent: '',
-          content: startTagBuffer
-        };
-      }
+            if (startTagBuffer.startsWith(thinkStartChars)) {
+              isInThinkTag = true;
+              return {
+                reasoningContent: startTagBuffer.slice(thinkStartChars.length),
+                content: ''
+              };
+            }

-      // 确认是 think 标签内容，开始返回 think 内容，并实时检测 </think>
-      /* 
+            // 如果未命中 think 标签，则认为不在 think 标签中，返回 buffer 内容作为 content
+            isInThinkTag = false;
+            return {
+              reasoningContent: '',
+              content: startTagBuffer
+            };
+          }
+
+          // 确认是 think 标签内容，开始返回 think 内容，并实时检测 </think>
+          /* 
        检测 </think> 方案。
        存储所有疑似 </think> 的内容，直到检测到完整的 </think> 标签或超出 </think> 长度。
        content 返回值包含以下几种情况:
@@ -342,124 +351,145 @@ export const parseLLMStreamResponse = () => {
          </think>abc - 完全命中尾标签
          k>abc - 命中一部分尾标签
      */
-      // endTagBuffer 专门用来记录疑似尾标签的内容
-      if (endTagBuffer) {
-        endTagBuffer += content;
-        if (endTagBuffer.includes(thinkEndChars)) {
-          isInThinkTag = false;
-          const answer = endTagBuffer.slice(thinkEndChars.length);
-          return {
-            reasoningContent: '',
-            content: answer
-          };
-        } else if (endTagBuffer.length >= thinkEndChars.length) {
-          // 缓存内容超出尾标签长度，且仍未命中 </think>，则认为本次猜测 </think> 失败，仍处于 think 阶段。
-          const tmp = endTagBuffer;
-          endTagBuffer = '';
-          return {
-            reasoningContent: tmp,
-            content: ''
-          };
-        }
-        return {
-          reasoningContent: '',
-          content: ''
-        };
-      } else if (content.includes(thinkEndChars)) {
-        // 返回内容，完整命中</think>，直接结束
-        isInThinkTag = false;
-        const [think, answer] = content.split(thinkEndChars);
-        return {
-          reasoningContent: think,
-          content: answer
-        };
-      } else {
-        // 无 buffer，且未命中 </think>，开始疑似 </think> 检测。
-        for (let i = 1; i < thinkEndChars.length; i++) {
-          const partialEndTag = thinkEndChars.slice(0, i);
-          // 命中一部分尾标签
-          if (content.endsWith(partialEndTag)) {
-            const think = content.slice(0, -partialEndTag.length);
-            endTagBuffer += partialEndTag;
+          // endTagBuffer 专门用来记录疑似尾标签的内容
+          if (endTagBuffer) {
+            endTagBuffer += content;
+            if (endTagBuffer.includes(thinkEndChars)) {
+              isInThinkTag = false;
+              const answer = endTagBuffer.slice(thinkEndChars.length);
+              return {
+                reasoningContent: '',
+                content: answer
+              };
+            } else if (endTagBuffer.length >= thinkEndChars.length) {
+              // 缓存内容超出尾标签长度，且仍未命中 </think>，则认为本次猜测 </think> 失败，仍处于 think 阶段。
+              const tmp = endTagBuffer;
+              endTagBuffer = '';
+              return {
+                reasoningContent: tmp,
+                content: ''
+              };
+            }
            return {
-              reasoningContent: think,
+              reasoningContent: '',
              content: ''
            };
+          } else if (content.includes(thinkEndChars)) {
+            // 返回内容，完整命中</think>，直接结束
+            isInThinkTag = false;
+            const [think, answer] = content.split(thinkEndChars);
+            return {
+              reasoningContent: think,
+              content: answer
+            };
+          } else {
+            // 无 buffer，且未命中 </think>，开始疑似 </think> 检测。
+            for (let i = 1; i < thinkEndChars.length; i++) {
+              const partialEndTag = thinkEndChars.slice(0, i);
+              // 命中一部分尾标签
+              if (content.endsWith(partialEndTag)) {
+                const think = content.slice(0, -partialEndTag.length);
+                endTagBuffer += partialEndTag;
+                return {
+                  reasoningContent: think,
+                  content: ''
+                };
+              }
+            }
          }
-        }
+
+          // 完全未命中尾标签，还是 think 阶段。
+          return {
+            reasoningContent: content,
+            content: ''
+          };
+        })();
+
+      // Parse datset cite
+      if (retainDatasetCite) {
+        return {
+          reasoningContent: parsedThinkReasoningContent,
+          content: parsedThinkContent,
+          responseContent: parsedThinkContent,
+          finishReason: buffer_finishReason
+        };
      }

-      // 完全未命中尾标签，还是 think 阶段。
-      return {
-        reasoningContent: content,
-        content: ''
-      };
-    })();
+      // 缓存包含 [ 的字符串，直到超出 maxCiteBufferLength 再一次性返回
+      const parseCite = (text: string) => {
+        // 结束时，返回所有剩余内容
+        if (isStreamEnd) {
+          const content = citeBuffer + text;
+          return {
+            content: removeDatasetCiteText(content, false)
+          };
+        }
+
+        // 新内容包含 [，初始化缓冲数据
+        if (text.includes('[')) {
+          const index = text.indexOf('[');
+          const beforeContent = citeBuffer + text.slice(0, index);
+          citeBuffer = text.slice(index);
+
+          // beforeContent 可能是：普通字符串，带 [ 的字符串
+          return {
+            content: removeDatasetCiteText(beforeContent, false)
+          };
+        }
+        // 处于 Cite 缓冲区，判断是否满足条件
+        else if (citeBuffer) {
+          citeBuffer += text;
+
+          // 检查缓冲区长度是否达到完整Quote长度或已经流结束
+          if (citeBuffer.length >= maxCiteBufferLength) {
+            const content = removeDatasetCiteText(citeBuffer, false);
+            citeBuffer = '';
+
+            return {
+              content
+            };
+          } else {
+            // 暂时不返回内容
+            return { content: '' };
+          }
+        }
+
+        return {
+          content: text
+        };
+      };
+      const { content: pasedCiteContent } = parseCite(parsedThinkContent);

-    // Parse datset cite
-    if (retainDatasetCite) {
      return {
        reasoningContent: parsedThinkReasoningContent,
        content: parsedThinkContent,
-        responseContent: parsedThinkContent,
-        finishReason
+        responseContent: pasedCiteContent,
+        finishReason: buffer_finishReason
      };
-    }
+    })();

-    // 缓存包含 [ 的字符串，直到超出 maxCiteBufferLength 再一次性返回
-    const parseCite = (text: string) => {
-      // 结束时，返回所有剩余内容
-      if (isStreamEnd) {
-        const content = citeBuffer + text;
-        return {
-          content: removeDatasetCiteText(content, false)
-        };
-      }
+    buffer_reasoningContent += data.reasoningContent;
+    buffer_content += data.content;

-      // 新内容包含 [，初始化缓冲数据
-      if (text.includes('[')) {
-        const index = text.indexOf('[');
-        const beforeContent = citeBuffer + text.slice(0, index);
-        citeBuffer = text.slice(index);
-
-        // beforeContent 可能是：普通字符串，带 [ 的字符串
-        return {
-          content: removeDatasetCiteText(beforeContent, false)
-        };
-      }
-      // 处于 Cite 缓冲区，判断是否满足条件
-      else if (citeBuffer) {
-        citeBuffer += text;
-
-        // 检查缓冲区长度是否达到完整Quote长度或已经流结束
-        if (citeBuffer.length >= maxCiteBufferLength) {
-          const content = removeDatasetCiteText(citeBuffer, false);
-          citeBuffer = '';
-
-          return {
-            content
-          };
-        } else {
-          // 暂时不返回内容
-          return { content: '' };
-        }
-      }
-
-      return {
-        content: text
-      };
-    };
-    const { content: pasedCiteContent } = parseCite(parsedThinkContent);
+    return data;
+  };

+  const getResponseData = () => {
    return {
-      reasoningContent: parsedThinkReasoningContent,
-      content: parsedThinkContent,
-      responseContent: pasedCiteContent,
-      finishReason
+      finish_reason: buffer_finishReason,
+      usage: buffer_usage,
+      reasoningContent: buffer_reasoningContent,
+      content: buffer_content
    };
  };

+  const updateFinishReason = (finishReason: CompletionFinishReason) => {
+    buffer_finishReason = finishReason;
+  };
+
  return {
-    parsePart
+    parsePart,
+    getResponseData,
+    updateFinishReason
  };
 };
--- a/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
@@ -1,13 +1,14 @@
 import { createChatCompletion } from '../../../../ai/config';
 import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
-import {
-  type ChatCompletion,
-  type StreamChatType,
-  type ChatCompletionMessageParam,
-  type ChatCompletionCreateParams,
-  type ChatCompletionMessageFunctionCall,
-  type ChatCompletionFunctionMessageParam,
-  type ChatCompletionAssistantMessageParam
+import type {
+  ChatCompletion,
+  StreamChatType,
+  ChatCompletionMessageParam,
+  ChatCompletionCreateParams,
+  ChatCompletionMessageFunctionCall,
+  ChatCompletionFunctionMessageParam,
+  ChatCompletionAssistantMessageParam,
+  CompletionFinishReason
 } from '@fastgpt/global/core/ai/type.d';
 import { type NextApiResponse } from 'next';
 import { responseWriteController } from '../../../../../common/response';
@@ -259,14 +260,15 @@ export const runToolWithFunctionCall = async (
    }
  });

-  let { answer, functionCalls, inputTokens, outputTokens } = await (async () => {
+  let { answer, functionCalls, inputTokens, outputTokens, finish_reason } = await (async () => {
    if (isStreamResponse) {
      if (!res || res.closed) {
        return {
          answer: '',
          functionCalls: [],
          inputTokens: 0,
-          outputTokens: 0
+          outputTokens: 0,
+          finish_reason: 'close' as const
        };
      }
      const result = await streamResponse({
@@ -281,10 +283,12 @@ export const runToolWithFunctionCall = async (
        answer: result.answer,
        functionCalls: result.functionCalls,
        inputTokens: result.usage.prompt_tokens,
-        outputTokens: result.usage.completion_tokens
+        outputTokens: result.usage.completion_tokens,
+        finish_reason: result.finish_reason
      };
    } else {
      const result = aiResponse as ChatCompletion;
+      const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
      const function_call = result.choices?.[0]?.message?.function_call;
      const usage = result.usage;

@@ -315,7 +319,8 @@ export const runToolWithFunctionCall = async (
        answer,
        functionCalls: toolCalls,
        inputTokens: usage?.prompt_tokens,
-        outputTokens: usage?.completion_tokens
+        outputTokens: usage?.completion_tokens,
+        finish_reason
      };
    }
  })();
@@ -481,7 +486,8 @@ export const runToolWithFunctionCall = async (
        completeMessages,
        assistantResponses: toolNodeAssistants,
        runTimes,
-        toolWorkflowInteractiveResponse
+        toolWorkflowInteractiveResponse,
+        finish_reason
      };
    }

@@ -495,7 +501,8 @@ export const runToolWithFunctionCall = async (
        toolNodeInputTokens,
        toolNodeOutputTokens,
        assistantResponses: toolNodeAssistants,
-        runTimes
+        runTimes,
+        finish_reason
      }
    );
  } else {
@@ -523,7 +530,8 @@ export const runToolWithFunctionCall = async (
        : outputTokens,
      completeMessages,
      assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
-      runTimes: (response?.runTimes || 0) + 1
+      runTimes: (response?.runTimes || 0) + 1,
+      finish_reason
    };
  }
 };
@@ -546,28 +554,25 @@ async function streamResponse({
    readStream: stream
  });

-  let textAnswer = '';
  let functionCalls: ChatCompletionMessageFunctionCall[] = [];
  let functionId = getNanoid();
-  let usage = getLLMDefaultUsage();

-  const { parsePart } = parseLLMStreamResponse();
+  const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();

  for await (const part of stream) {
-    usage = part.usage || usage;
    if (res.closed) {
      stream.controller?.abort();
+      updateFinishReason('close');
      break;
    }

-    const { content: toolChoiceContent, responseContent } = parsePart({
+    const { responseContent } = parsePart({
      part,
      parseThinkTag: false,
      retainDatasetCite
    });

    const responseChoice = part.choices?.[0]?.delta;
-    textAnswer += toolChoiceContent;

    if (responseContent) {
      workflowStreamResponse?.({
@@ -577,7 +582,7 @@ async function streamResponse({
          text: responseContent
        })
      });
-    } else if (responseChoice.function_call) {
+    } else if (responseChoice?.function_call) {
      const functionCall: {
        arguments?: string;
        name?: string;
@@ -640,5 +645,7 @@ async function streamResponse({
    }
  }

-  return { answer: textAnswer, functionCalls, usage };
+  const { content, finish_reason, usage } = getResponseData();
+
+  return { answer: content, functionCalls, finish_reason, usage };
 }
--- a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
@@ -220,7 +220,8 @@ export const runToolWithPromptCall = async (

  const max_tokens = computedMaxToken({
    model: toolModel,
-    maxToken
+    maxToken,
+    min: 100
  });
  const filterMessages = await filterGPTMessageByMaxContext({
    messages,
@@ -592,28 +593,22 @@ async function streamResponse({

  let startResponseWrite = false;
  let answer = '';
-  let reasoning = '';
-  let finish_reason: CompletionFinishReason = null;
-  let usage = getLLMDefaultUsage();

-  const { parsePart } = parseLLMStreamResponse();
+  const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();

  for await (const part of stream) {
-    usage = part.usage || usage;
    if (res.closed) {
      stream.controller?.abort();
-      finish_reason = 'close';
+      updateFinishReason('close');
      break;
    }

-    const { reasoningContent, content, responseContent, finishReason } = parsePart({
+    const { reasoningContent, content, responseContent } = parsePart({
      part,
      parseThinkTag: aiChatReasoning,
      retainDatasetCite
    });
-    finish_reason = finish_reason || finishReason;
    answer += content;
-    reasoning += reasoningContent;

    // Reasoning response
    if (aiChatReasoning && reasoningContent) {
@@ -658,7 +653,9 @@ async function streamResponse({
    }
  }

-  return { answer, reasoning, finish_reason, usage };
+  const { reasoningContent, content, finish_reason, usage } = getResponseData();
+
+  return { answer: content, reasoning: reasoningContent, finish_reason, usage };
 }

 const parseAnswer = (
--- a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
@@ -7,17 +7,13 @@ import {
  type ChatCompletionToolMessageParam,
  type ChatCompletionMessageParam,
  type ChatCompletionTool,
-  type ChatCompletionAssistantMessageParam,
  type CompletionFinishReason
 } from '@fastgpt/global/core/ai/type';
 import { type NextApiResponse } from 'next';
 import { responseWriteController } from '../../../../../common/response';
 import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
 import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
-import {
-  ChatCompletionRequestMessageRoleEnum,
-  getLLMDefaultUsage
-} from '@fastgpt/global/core/ai/constants';
+import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
 import { dispatchWorkFlow } from '../../index';
 import {
  type DispatchToolModuleProps,
@@ -254,7 +250,8 @@ export const runToolWithToolChoice = async (

  const max_tokens = computedMaxToken({
    model: toolModel,
-    maxToken
+    maxToken,
+    min: 100
  });

  // Filter histories by maxToken
@@ -319,97 +316,101 @@ export const runToolWithToolChoice = async (
    }
  });

-  let { answer, toolCalls, finish_reason, inputTokens, outputTokens } = await (async () => {
-    if (isStreamResponse) {
-      if (!res || res.closed) {
-        return {
-          answer: '',
-          toolCalls: [],
-          finish_reason: 'close' as const,
-          inputTokens: 0,
-          outputTokens: 0
-        };
-      }
+  let { reasoningContent, answer, toolCalls, finish_reason, inputTokens, outputTokens } =
+    await (async () => {
+      if (isStreamResponse) {
+        if (!res || res.closed) {
+          return {
+            reasoningContent: '',
+            answer: '',
+            toolCalls: [],
+            finish_reason: 'close' as const,
+            inputTokens: 0,
+            outputTokens: 0
+          };
+        }

-      const result = await streamResponse({
-        res,
-        workflowStreamResponse,
-        toolNodes,
-        stream: aiResponse,
-        aiChatReasoning,
-        retainDatasetCite
-      });
-
-      return {
-        answer: result.answer,
-        toolCalls: result.toolCalls,
-        finish_reason: result.finish_reason,
-        inputTokens: result.usage.prompt_tokens,
-        outputTokens: result.usage.completion_tokens
-      };
-    } else {
-      const result = aiResponse as ChatCompletion;
-      const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
-      const calls = result.choices?.[0]?.message?.tool_calls || [];
-      const answer = result.choices?.[0]?.message?.content || '';
-      // @ts-ignore
-      const reasoningContent = result.choices?.[0]?.message?.reasoning_content || '';
-      const usage = result.usage;
-
-      if (aiChatReasoning && reasoningContent) {
-        workflowStreamResponse?.({
-          event: SseResponseEventEnum.fastAnswer,
-          data: textAdaptGptResponse({
-            reasoning_content: removeDatasetCiteText(reasoningContent, retainDatasetCite)
-          })
+        const result = await streamResponse({
+          res,
+          workflowStreamResponse,
+          toolNodes,
+          stream: aiResponse,
+          aiChatReasoning,
+          retainDatasetCite
        });
-      }

-      // 格式化 toolCalls
-      const toolCalls = calls.map((tool) => {
-        const toolNode = toolNodes.find((item) => item.nodeId === tool.function?.name);
+        return {
+          reasoningContent: result.reasoningContent,
+          answer: result.answer,
+          toolCalls: result.toolCalls,
+          finish_reason: result.finish_reason,
+          inputTokens: result.usage.prompt_tokens,
+          outputTokens: result.usage.completion_tokens
+        };
+      } else {
+        const result = aiResponse as ChatCompletion;
+        const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
+        const calls = result.choices?.[0]?.message?.tool_calls || [];
+        const answer = result.choices?.[0]?.message?.content || '';
+        // @ts-ignore
+        const reasoningContent = result.choices?.[0]?.message?.reasoning_content || '';
+        const usage = result.usage;

-        // 不支持 stream 模式的模型的这里需要补一个响应给客户端
-        workflowStreamResponse?.({
-          event: SseResponseEventEnum.toolCall,
-          data: {
-            tool: {
-              id: tool.id,
-              toolName: toolNode?.name || '',
-              toolAvatar: toolNode?.avatar || '',
-              functionName: tool.function.name,
-              params: tool.function?.arguments ?? '',
-              response: ''
+        if (aiChatReasoning && reasoningContent) {
+          workflowStreamResponse?.({
+            event: SseResponseEventEnum.fastAnswer,
+            data: textAdaptGptResponse({
+              reasoning_content: removeDatasetCiteText(reasoningContent, retainDatasetCite)
+            })
+          });
+        }
+
+        // 格式化 toolCalls
+        const toolCalls = calls.map((tool) => {
+          const toolNode = toolNodes.find((item) => item.nodeId === tool.function?.name);
+
+          // 不支持 stream 模式的模型的这里需要补一个响应给客户端
+          workflowStreamResponse?.({
+            event: SseResponseEventEnum.toolCall,
+            data: {
+              tool: {
+                id: tool.id,
+                toolName: toolNode?.name || '',
+                toolAvatar: toolNode?.avatar || '',
+                functionName: tool.function.name,
+                params: tool.function?.arguments ?? '',
+                response: ''
+              }
            }
-          }
+          });
+
+          return {
+            ...tool,
+            toolName: toolNode?.name || '',
+            toolAvatar: toolNode?.avatar || ''
+          };
        });

+        if (answer) {
+          workflowStreamResponse?.({
+            event: SseResponseEventEnum.fastAnswer,
+            data: textAdaptGptResponse({
+              text: removeDatasetCiteText(answer, retainDatasetCite)
+            })
+          });
+        }
+
        return {
-          ...tool,
-          toolName: toolNode?.name || '',
-          toolAvatar: toolNode?.avatar || ''
+          reasoningContent: (reasoningContent as string) || '',
+          answer,
+          toolCalls: toolCalls,
+          finish_reason,
+          inputTokens: usage?.prompt_tokens,
+          outputTokens: usage?.completion_tokens
        };
-      });
-
-      if (answer) {
-        workflowStreamResponse?.({
-          event: SseResponseEventEnum.fastAnswer,
-          data: textAdaptGptResponse({
-            text: removeDatasetCiteText(answer, retainDatasetCite)
-          })
-        });
      }
-
-      return {
-        answer,
-        toolCalls: toolCalls,
-        finish_reason,
-        inputTokens: usage?.prompt_tokens,
-        outputTokens: usage?.completion_tokens
-      };
-    }
-  })();
-  if (!answer && toolCalls.length === 0) {
+    })();
+  if (!answer && !reasoningContent && toolCalls.length === 0) {
    return Promise.reject(getEmptyResponseTip());
  }

@@ -501,12 +502,13 @@ export const runToolWithToolChoice = async (

  if (toolCalls.length > 0) {
    // Run the tool, combine its results, and perform another round of AI calls
-    const assistantToolMsgParams: ChatCompletionAssistantMessageParam[] = [
-      ...(answer
+    const assistantToolMsgParams: ChatCompletionMessageParam[] = [
+      ...(answer || reasoningContent
        ? [
            {
              role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
-              content: answer
+              content: answer,
+              reasoning_text: reasoningContent
            }
          ]
        : []),
@@ -627,9 +629,10 @@ export const runToolWithToolChoice = async (
    );
  } else {
    // No tool is invoked, indicating that the process is over
-    const gptAssistantResponse: ChatCompletionAssistantMessageParam = {
+    const gptAssistantResponse: ChatCompletionMessageParam = {
      role: ChatCompletionRequestMessageRoleEnum.Assistant,
-      content: answer
+      content: answer,
+      reasoning_text: reasoningContent
    };
    const completeMessages = filterMessages.concat(gptAssistantResponse);
    inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
@@ -671,34 +674,23 @@ async function streamResponse({
    readStream: stream
  });

-  let textAnswer = '';
  let callingTool: { name: string; arguments: string } | null = null;
  let toolCalls: ChatCompletionMessageToolCall[] = [];
-  let finish_reason: CompletionFinishReason = null;
-  let usage = getLLMDefaultUsage();

-  const { parsePart } = parseLLMStreamResponse();
+  const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();

  for await (const part of stream) {
-    usage = part.usage || usage;
    if (res.closed) {
      stream.controller?.abort();
-      finish_reason = 'close';
+      updateFinishReason('close');
      break;
    }

-    const {
-      reasoningContent,
-      content: toolChoiceContent,
-      responseContent,
-      finishReason
-    } = parsePart({
+    const { reasoningContent, responseContent } = parsePart({
      part,
      parseThinkTag: true,
      retainDatasetCite
    });
-    textAnswer += toolChoiceContent;
-    finish_reason = finishReason || finish_reason;

    const responseChoice = part.choices?.[0]?.delta;

@@ -800,5 +792,13 @@ async function streamResponse({
    }
  }

-  return { answer: textAnswer, toolCalls: toolCalls.filter(Boolean), finish_reason, usage };
+  const { reasoningContent, content, finish_reason, usage } = getResponseData();
+
+  return {
+    reasoningContent,
+    answer: content,
+    toolCalls: toolCalls.filter(Boolean),
+    finish_reason,
+    usage
+  };
 }
--- a/packages/service/core/workflow/dispatch/chat/oneapi.ts
+++ b/packages/service/core/workflow/dispatch/chat/oneapi.ts
@@ -556,30 +556,21 @@ async function streamResponse({
    res,
    readStream: stream
  });
-  let answer = '';
-  let reasoning = '';
-  let finish_reason: CompletionFinishReason = null;
-  let usage: CompletionUsage = getLLMDefaultUsage();

-  const { parsePart } = parseLLMStreamResponse();
+  const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();

  for await (const part of stream) {
-    usage = part.usage || usage;
-
    if (res.closed) {
      stream.controller?.abort();
-      finish_reason = 'close';
+      updateFinishReason('close');
      break;
    }

-    const { reasoningContent, content, responseContent, finishReason } = parsePart({
+    const { reasoningContent, responseContent } = parsePart({
      part,
      parseThinkTag,
      retainDatasetCite
    });
-    finish_reason = finish_reason || finishReason;
-    answer += content;
-    reasoning += reasoningContent;

    if (aiChatReasoning && reasoningContent) {
      workflowStreamResponse?.({
@@ -602,5 +593,7 @@ async function streamResponse({
    }
  }

+  const { reasoningContent: reasoning, content: answer, finish_reason, usage } = getResponseData();
+
  return { answer, reasoning, finish_reason, usage };
 }