fix: stream response (#4853)

This commit is contained in:
Archer
2025-05-21 10:21:20 +08:00
committed by GitHub
parent aa55f059d4
commit dd3c251603
6 changed files with 342 additions and 313 deletions

View File

@@ -19,4 +19,6 @@ weight: 790
## 🐛 修复 ## 🐛 修复
1. 全文检索多知识库时排序得分排序不正确 1. 全文检索多知识库时排序得分排序不正确
2. 流响应捕获 finish_reason 可能不正确。
3. 工具调用模式,未保存思考输出。

View File

@@ -18,15 +18,17 @@ import json5 from 'json5';
*/ */
export const computedMaxToken = ({ export const computedMaxToken = ({
maxToken, maxToken,
model model,
min
}: { }: {
maxToken?: number; maxToken?: number;
model: LLMModelItemType; model: LLMModelItemType;
min?: number;
}) => { }) => {
if (maxToken === undefined) return; if (maxToken === undefined) return;
maxToken = Math.min(maxToken, model.maxResponse); maxToken = Math.min(maxToken, model.maxResponse);
return maxToken; return Math.max(maxToken, min || 0);
}; };
// FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]…… // FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
@@ -178,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
} }
} }
return { return {
text: parseReasoningContent(answer)[1], text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
usage, usage,
toolCalls toolCalls
}; };
@@ -192,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
}> => { }> => {
const answer = response.choices?.[0]?.message?.content || ''; const answer = response.choices?.[0]?.message?.content || '';
const toolCalls = response.choices?.[0]?.message?.tool_calls; const toolCalls = response.choices?.[0]?.message?.tool_calls;
return { return {
text: answer, text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
usage: response.usage, usage: response.usage,
toolCalls toolCalls
}; };
@@ -240,6 +243,12 @@ export const parseLLMStreamResponse = () => {
let citeBuffer = ''; let citeBuffer = '';
const maxCiteBufferLength = 32; // [Object](CITE)总长度为32 const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
// Buffer
let buffer_finishReason: CompletionFinishReason = null;
let buffer_usage: CompletionUsage = getLLMDefaultUsage();
let buffer_reasoningContent = '';
let buffer_content = '';
/* /*
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。 parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
retainDatasetCite - retainDatasetCite -
@@ -257,6 +266,7 @@ export const parseLLMStreamResponse = () => {
}; };
finish_reason?: CompletionFinishReason; finish_reason?: CompletionFinishReason;
}[]; }[];
usage?: CompletionUsage;
}; };
parseThinkTag?: boolean; parseThinkTag?: boolean;
retainDatasetCite?: boolean; retainDatasetCite?: boolean;
@@ -266,72 +276,71 @@ export const parseLLMStreamResponse = () => {
responseContent: string; responseContent: string;
finishReason: CompletionFinishReason; finishReason: CompletionFinishReason;
} => { } => {
const finishReason = part.choices?.[0]?.finish_reason || null; const data = (() => {
const content = part.choices?.[0]?.delta?.content || ''; buffer_usage = part.usage || buffer_usage;
// @ts-ignore
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
const isStreamEnd = !!finishReason;
// Parse think const finishReason = part.choices?.[0]?.finish_reason || null;
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => { buffer_finishReason = finishReason || buffer_finishReason;
if (reasoningContent || !parseThinkTag) {
isInThinkTag = false;
return { reasoningContent, content };
}
if (!content) { const content = part.choices?.[0]?.delta?.content || '';
return { // @ts-ignore
reasoningContent: '', const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
content: '' const isStreamEnd = !!buffer_finishReason;
};
}
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content // Parse think
if (isInThinkTag === false) { const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
return { (() => {
reasoningContent: '', if (reasoningContent || !parseThinkTag) {
content isInThinkTag = false;
}; return { reasoningContent, content };
} }
// 检测是否为 think 标签开头的数据 // 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
if (isInThinkTag === undefined) { if (isInThinkTag === false) {
// Parse content think and answer
startTagBuffer += content;
// 太少内容时候,暂时不解析
if (startTagBuffer.length < thinkStartChars.length) {
if (isStreamEnd) {
const tmpContent = startTagBuffer;
startTagBuffer = '';
return { return {
reasoningContent: '', reasoningContent: '',
content: tmpContent content
}; };
} }
return {
reasoningContent: '',
content: ''
};
}
if (startTagBuffer.startsWith(thinkStartChars)) { // 检测是否为 think 标签开头的数据
isInThinkTag = true; if (isInThinkTag === undefined) {
return { // Parse content think and answer
reasoningContent: startTagBuffer.slice(thinkStartChars.length), startTagBuffer += content;
content: '' // 太少内容时候,暂时不解析
}; if (startTagBuffer.length < thinkStartChars.length) {
} if (isStreamEnd) {
const tmpContent = startTagBuffer;
startTagBuffer = '';
return {
reasoningContent: '',
content: tmpContent
};
}
return {
reasoningContent: '',
content: ''
};
}
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content if (startTagBuffer.startsWith(thinkStartChars)) {
isInThinkTag = false; isInThinkTag = true;
return { return {
reasoningContent: '', reasoningContent: startTagBuffer.slice(thinkStartChars.length),
content: startTagBuffer content: ''
}; };
} }
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think> // 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
/* isInThinkTag = false;
return {
reasoningContent: '',
content: startTagBuffer
};
}
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
/*
检测 </think> 方案。 检测 </think> 方案。
存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。 存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。
content 返回值包含以下几种情况: content 返回值包含以下几种情况:
@@ -342,124 +351,145 @@ export const parseLLMStreamResponse = () => {
</think>abc - 完全命中尾标签 </think>abc - 完全命中尾标签
k>abc - 命中一部分尾标签 k>abc - 命中一部分尾标签
*/ */
// endTagBuffer 专门用来记录疑似尾标签的内容 // endTagBuffer 专门用来记录疑似尾标签的内容
if (endTagBuffer) { if (endTagBuffer) {
endTagBuffer += content; endTagBuffer += content;
if (endTagBuffer.includes(thinkEndChars)) { if (endTagBuffer.includes(thinkEndChars)) {
isInThinkTag = false; isInThinkTag = false;
const answer = endTagBuffer.slice(thinkEndChars.length); const answer = endTagBuffer.slice(thinkEndChars.length);
return { return {
reasoningContent: '', reasoningContent: '',
content: answer content: answer
}; };
} else if (endTagBuffer.length >= thinkEndChars.length) { } else if (endTagBuffer.length >= thinkEndChars.length) {
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。 // 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
const tmp = endTagBuffer; const tmp = endTagBuffer;
endTagBuffer = ''; endTagBuffer = '';
return { return {
reasoningContent: tmp, reasoningContent: tmp,
content: '' content: ''
}; };
} }
return {
reasoningContent: '',
content: ''
};
} else if (content.includes(thinkEndChars)) {
// 返回内容,完整命中</think>,直接结束
isInThinkTag = false;
const [think, answer] = content.split(thinkEndChars);
return {
reasoningContent: think,
content: answer
};
} else {
// 无 buffer且未命中 </think>,开始疑似 </think> 检测。
for (let i = 1; i < thinkEndChars.length; i++) {
const partialEndTag = thinkEndChars.slice(0, i);
// 命中一部分尾标签
if (content.endsWith(partialEndTag)) {
const think = content.slice(0, -partialEndTag.length);
endTagBuffer += partialEndTag;
return { return {
reasoningContent: think, reasoningContent: '',
content: '' content: ''
}; };
} else if (content.includes(thinkEndChars)) {
// 返回内容,完整命中</think>,直接结束
isInThinkTag = false;
const [think, answer] = content.split(thinkEndChars);
return {
reasoningContent: think,
content: answer
};
} else {
// 无 buffer且未命中 </think>,开始疑似 </think> 检测。
for (let i = 1; i < thinkEndChars.length; i++) {
const partialEndTag = thinkEndChars.slice(0, i);
// 命中一部分尾标签
if (content.endsWith(partialEndTag)) {
const think = content.slice(0, -partialEndTag.length);
endTagBuffer += partialEndTag;
return {
reasoningContent: think,
content: ''
};
}
}
} }
}
// 完全未命中尾标签,还是 think 阶段。
return {
reasoningContent: content,
content: ''
};
})();
// Parse datset cite
if (retainDatasetCite) {
return {
reasoningContent: parsedThinkReasoningContent,
content: parsedThinkContent,
responseContent: parsedThinkContent,
finishReason: buffer_finishReason
};
} }
// 完全未命中尾标签,还是 think 阶段。 // 缓存包含 [ 的字符串,直到超出 maxCiteBufferLength 再一次性返回
return { const parseCite = (text: string) => {
reasoningContent: content, // 结束时,返回所有剩余内容
content: '' if (isStreamEnd) {
}; const content = citeBuffer + text;
})(); return {
content: removeDatasetCiteText(content, false)
};
}
// 新内容包含 [,初始化缓冲数据
if (text.includes('[')) {
const index = text.indexOf('[');
const beforeContent = citeBuffer + text.slice(0, index);
citeBuffer = text.slice(index);
// beforeContent 可能是:普通字符串,带 [ 的字符串
return {
content: removeDatasetCiteText(beforeContent, false)
};
}
// 处于 Cite 缓冲区,判断是否满足条件
else if (citeBuffer) {
citeBuffer += text;
// 检查缓冲区长度是否达到完整Quote长度或已经流结束
if (citeBuffer.length >= maxCiteBufferLength) {
const content = removeDatasetCiteText(citeBuffer, false);
citeBuffer = '';
return {
content
};
} else {
// 暂时不返回内容
return { content: '' };
}
}
return {
content: text
};
};
const { content: pasedCiteContent } = parseCite(parsedThinkContent);
// Parse datset cite
if (retainDatasetCite) {
return { return {
reasoningContent: parsedThinkReasoningContent, reasoningContent: parsedThinkReasoningContent,
content: parsedThinkContent, content: parsedThinkContent,
responseContent: parsedThinkContent, responseContent: pasedCiteContent,
finishReason finishReason: buffer_finishReason
}; };
} })();
// 缓存包含 [ 的字符串,直到超出 maxCiteBufferLength 再一次性返回 buffer_reasoningContent += data.reasoningContent;
const parseCite = (text: string) => { buffer_content += data.content;
// 结束时,返回所有剩余内容
if (isStreamEnd) {
const content = citeBuffer + text;
return {
content: removeDatasetCiteText(content, false)
};
}
// 新内容包含 [,初始化缓冲数据 return data;
if (text.includes('[')) { };
const index = text.indexOf('[');
const beforeContent = citeBuffer + text.slice(0, index);
citeBuffer = text.slice(index);
// beforeContent 可能是:普通字符串,带 [ 的字符串
return {
content: removeDatasetCiteText(beforeContent, false)
};
}
// 处于 Cite 缓冲区,判断是否满足条件
else if (citeBuffer) {
citeBuffer += text;
// 检查缓冲区长度是否达到完整Quote长度或已经流结束
if (citeBuffer.length >= maxCiteBufferLength) {
const content = removeDatasetCiteText(citeBuffer, false);
citeBuffer = '';
return {
content
};
} else {
// 暂时不返回内容
return { content: '' };
}
}
return {
content: text
};
};
const { content: pasedCiteContent } = parseCite(parsedThinkContent);
const getResponseData = () => {
return { return {
reasoningContent: parsedThinkReasoningContent, finish_reason: buffer_finishReason,
content: parsedThinkContent, usage: buffer_usage,
responseContent: pasedCiteContent, reasoningContent: buffer_reasoningContent,
finishReason content: buffer_content
}; };
}; };
const updateFinishReason = (finishReason: CompletionFinishReason) => {
buffer_finishReason = finishReason;
};
return { return {
parsePart parsePart,
getResponseData,
updateFinishReason
}; };
}; };

View File

@@ -1,13 +1,14 @@
import { createChatCompletion } from '../../../../ai/config'; import { createChatCompletion } from '../../../../ai/config';
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils'; import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
import { import type {
type ChatCompletion, ChatCompletion,
type StreamChatType, StreamChatType,
type ChatCompletionMessageParam, ChatCompletionMessageParam,
type ChatCompletionCreateParams, ChatCompletionCreateParams,
type ChatCompletionMessageFunctionCall, ChatCompletionMessageFunctionCall,
type ChatCompletionFunctionMessageParam, ChatCompletionFunctionMessageParam,
type ChatCompletionAssistantMessageParam ChatCompletionAssistantMessageParam,
CompletionFinishReason
} from '@fastgpt/global/core/ai/type.d'; } from '@fastgpt/global/core/ai/type.d';
import { type NextApiResponse } from 'next'; import { type NextApiResponse } from 'next';
import { responseWriteController } from '../../../../../common/response'; import { responseWriteController } from '../../../../../common/response';
@@ -259,14 +260,15 @@ export const runToolWithFunctionCall = async (
} }
}); });
let { answer, functionCalls, inputTokens, outputTokens } = await (async () => { let { answer, functionCalls, inputTokens, outputTokens, finish_reason } = await (async () => {
if (isStreamResponse) { if (isStreamResponse) {
if (!res || res.closed) { if (!res || res.closed) {
return { return {
answer: '', answer: '',
functionCalls: [], functionCalls: [],
inputTokens: 0, inputTokens: 0,
outputTokens: 0 outputTokens: 0,
finish_reason: 'close' as const
}; };
} }
const result = await streamResponse({ const result = await streamResponse({
@@ -281,10 +283,12 @@ export const runToolWithFunctionCall = async (
answer: result.answer, answer: result.answer,
functionCalls: result.functionCalls, functionCalls: result.functionCalls,
inputTokens: result.usage.prompt_tokens, inputTokens: result.usage.prompt_tokens,
outputTokens: result.usage.completion_tokens outputTokens: result.usage.completion_tokens,
finish_reason: result.finish_reason
}; };
} else { } else {
const result = aiResponse as ChatCompletion; const result = aiResponse as ChatCompletion;
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
const function_call = result.choices?.[0]?.message?.function_call; const function_call = result.choices?.[0]?.message?.function_call;
const usage = result.usage; const usage = result.usage;
@@ -315,7 +319,8 @@ export const runToolWithFunctionCall = async (
answer, answer,
functionCalls: toolCalls, functionCalls: toolCalls,
inputTokens: usage?.prompt_tokens, inputTokens: usage?.prompt_tokens,
outputTokens: usage?.completion_tokens outputTokens: usage?.completion_tokens,
finish_reason
}; };
} }
})(); })();
@@ -481,7 +486,8 @@ export const runToolWithFunctionCall = async (
completeMessages, completeMessages,
assistantResponses: toolNodeAssistants, assistantResponses: toolNodeAssistants,
runTimes, runTimes,
toolWorkflowInteractiveResponse toolWorkflowInteractiveResponse,
finish_reason
}; };
} }
@@ -495,7 +501,8 @@ export const runToolWithFunctionCall = async (
toolNodeInputTokens, toolNodeInputTokens,
toolNodeOutputTokens, toolNodeOutputTokens,
assistantResponses: toolNodeAssistants, assistantResponses: toolNodeAssistants,
runTimes runTimes,
finish_reason
} }
); );
} else { } else {
@@ -523,7 +530,8 @@ export const runToolWithFunctionCall = async (
: outputTokens, : outputTokens,
completeMessages, completeMessages,
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value], assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
runTimes: (response?.runTimes || 0) + 1 runTimes: (response?.runTimes || 0) + 1,
finish_reason
}; };
} }
}; };
@@ -546,28 +554,25 @@ async function streamResponse({
readStream: stream readStream: stream
}); });
let textAnswer = '';
let functionCalls: ChatCompletionMessageFunctionCall[] = []; let functionCalls: ChatCompletionMessageFunctionCall[] = [];
let functionId = getNanoid(); let functionId = getNanoid();
let usage = getLLMDefaultUsage();
const { parsePart } = parseLLMStreamResponse(); const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
for await (const part of stream) { for await (const part of stream) {
usage = part.usage || usage;
if (res.closed) { if (res.closed) {
stream.controller?.abort(); stream.controller?.abort();
updateFinishReason('close');
break; break;
} }
const { content: toolChoiceContent, responseContent } = parsePart({ const { responseContent } = parsePart({
part, part,
parseThinkTag: false, parseThinkTag: false,
retainDatasetCite retainDatasetCite
}); });
const responseChoice = part.choices?.[0]?.delta; const responseChoice = part.choices?.[0]?.delta;
textAnswer += toolChoiceContent;
if (responseContent) { if (responseContent) {
workflowStreamResponse?.({ workflowStreamResponse?.({
@@ -577,7 +582,7 @@ async function streamResponse({
text: responseContent text: responseContent
}) })
}); });
} else if (responseChoice.function_call) { } else if (responseChoice?.function_call) {
const functionCall: { const functionCall: {
arguments?: string; arguments?: string;
name?: string; name?: string;
@@ -640,5 +645,7 @@ async function streamResponse({
} }
} }
return { answer: textAnswer, functionCalls, usage }; const { content, finish_reason, usage } = getResponseData();
return { answer: content, functionCalls, finish_reason, usage };
} }

View File

@@ -220,7 +220,8 @@ export const runToolWithPromptCall = async (
const max_tokens = computedMaxToken({ const max_tokens = computedMaxToken({
model: toolModel, model: toolModel,
maxToken maxToken,
min: 100
}); });
const filterMessages = await filterGPTMessageByMaxContext({ const filterMessages = await filterGPTMessageByMaxContext({
messages, messages,
@@ -592,28 +593,22 @@ async function streamResponse({
let startResponseWrite = false; let startResponseWrite = false;
let answer = ''; let answer = '';
let reasoning = '';
let finish_reason: CompletionFinishReason = null;
let usage = getLLMDefaultUsage();
const { parsePart } = parseLLMStreamResponse(); const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
for await (const part of stream) { for await (const part of stream) {
usage = part.usage || usage;
if (res.closed) { if (res.closed) {
stream.controller?.abort(); stream.controller?.abort();
finish_reason = 'close'; updateFinishReason('close');
break; break;
} }
const { reasoningContent, content, responseContent, finishReason } = parsePart({ const { reasoningContent, content, responseContent } = parsePart({
part, part,
parseThinkTag: aiChatReasoning, parseThinkTag: aiChatReasoning,
retainDatasetCite retainDatasetCite
}); });
finish_reason = finish_reason || finishReason;
answer += content; answer += content;
reasoning += reasoningContent;
// Reasoning response // Reasoning response
if (aiChatReasoning && reasoningContent) { if (aiChatReasoning && reasoningContent) {
@@ -658,7 +653,9 @@ async function streamResponse({
} }
} }
return { answer, reasoning, finish_reason, usage }; const { reasoningContent, content, finish_reason, usage } = getResponseData();
return { answer: content, reasoning: reasoningContent, finish_reason, usage };
} }
const parseAnswer = ( const parseAnswer = (

View File

@@ -7,17 +7,13 @@ import {
type ChatCompletionToolMessageParam, type ChatCompletionToolMessageParam,
type ChatCompletionMessageParam, type ChatCompletionMessageParam,
type ChatCompletionTool, type ChatCompletionTool,
type ChatCompletionAssistantMessageParam,
type CompletionFinishReason type CompletionFinishReason
} from '@fastgpt/global/core/ai/type'; } from '@fastgpt/global/core/ai/type';
import { type NextApiResponse } from 'next'; import { type NextApiResponse } from 'next';
import { responseWriteController } from '../../../../../common/response'; import { responseWriteController } from '../../../../../common/response';
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants'; import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils'; import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
import { import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
ChatCompletionRequestMessageRoleEnum,
getLLMDefaultUsage
} from '@fastgpt/global/core/ai/constants';
import { dispatchWorkFlow } from '../../index'; import { dispatchWorkFlow } from '../../index';
import { import {
type DispatchToolModuleProps, type DispatchToolModuleProps,
@@ -254,7 +250,8 @@ export const runToolWithToolChoice = async (
const max_tokens = computedMaxToken({ const max_tokens = computedMaxToken({
model: toolModel, model: toolModel,
maxToken maxToken,
min: 100
}); });
// Filter histories by maxToken // Filter histories by maxToken
@@ -319,97 +316,101 @@ export const runToolWithToolChoice = async (
} }
}); });
let { answer, toolCalls, finish_reason, inputTokens, outputTokens } = await (async () => { let { reasoningContent, answer, toolCalls, finish_reason, inputTokens, outputTokens } =
if (isStreamResponse) { await (async () => {
if (!res || res.closed) { if (isStreamResponse) {
return { if (!res || res.closed) {
answer: '', return {
toolCalls: [], reasoningContent: '',
finish_reason: 'close' as const, answer: '',
inputTokens: 0, toolCalls: [],
outputTokens: 0 finish_reason: 'close' as const,
}; inputTokens: 0,
} outputTokens: 0
};
}
const result = await streamResponse({ const result = await streamResponse({
res, res,
workflowStreamResponse, workflowStreamResponse,
toolNodes, toolNodes,
stream: aiResponse, stream: aiResponse,
aiChatReasoning, aiChatReasoning,
retainDatasetCite retainDatasetCite
});
return {
answer: result.answer,
toolCalls: result.toolCalls,
finish_reason: result.finish_reason,
inputTokens: result.usage.prompt_tokens,
outputTokens: result.usage.completion_tokens
};
} else {
const result = aiResponse as ChatCompletion;
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
const calls = result.choices?.[0]?.message?.tool_calls || [];
const answer = result.choices?.[0]?.message?.content || '';
// @ts-ignore
const reasoningContent = result.choices?.[0]?.message?.reasoning_content || '';
const usage = result.usage;
if (aiChatReasoning && reasoningContent) {
workflowStreamResponse?.({
event: SseResponseEventEnum.fastAnswer,
data: textAdaptGptResponse({
reasoning_content: removeDatasetCiteText(reasoningContent, retainDatasetCite)
})
}); });
}
// 格式化 toolCalls return {
const toolCalls = calls.map((tool) => { reasoningContent: result.reasoningContent,
const toolNode = toolNodes.find((item) => item.nodeId === tool.function?.name); answer: result.answer,
toolCalls: result.toolCalls,
finish_reason: result.finish_reason,
inputTokens: result.usage.prompt_tokens,
outputTokens: result.usage.completion_tokens
};
} else {
const result = aiResponse as ChatCompletion;
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
const calls = result.choices?.[0]?.message?.tool_calls || [];
const answer = result.choices?.[0]?.message?.content || '';
// @ts-ignore
const reasoningContent = result.choices?.[0]?.message?.reasoning_content || '';
const usage = result.usage;
// 不支持 stream 模式的模型的这里需要补一个响应给客户端 if (aiChatReasoning && reasoningContent) {
workflowStreamResponse?.({ workflowStreamResponse?.({
event: SseResponseEventEnum.toolCall, event: SseResponseEventEnum.fastAnswer,
data: { data: textAdaptGptResponse({
tool: { reasoning_content: removeDatasetCiteText(reasoningContent, retainDatasetCite)
id: tool.id, })
toolName: toolNode?.name || '', });
toolAvatar: toolNode?.avatar || '', }
functionName: tool.function.name,
params: tool.function?.arguments ?? '', // 格式化 toolCalls
response: '' const toolCalls = calls.map((tool) => {
const toolNode = toolNodes.find((item) => item.nodeId === tool.function?.name);
// 不支持 stream 模式的模型的这里需要补一个响应给客户端
workflowStreamResponse?.({
event: SseResponseEventEnum.toolCall,
data: {
tool: {
id: tool.id,
toolName: toolNode?.name || '',
toolAvatar: toolNode?.avatar || '',
functionName: tool.function.name,
params: tool.function?.arguments ?? '',
response: ''
}
} }
} });
return {
...tool,
toolName: toolNode?.name || '',
toolAvatar: toolNode?.avatar || ''
};
}); });
if (answer) {
workflowStreamResponse?.({
event: SseResponseEventEnum.fastAnswer,
data: textAdaptGptResponse({
text: removeDatasetCiteText(answer, retainDatasetCite)
})
});
}
return { return {
...tool, reasoningContent: (reasoningContent as string) || '',
toolName: toolNode?.name || '', answer,
toolAvatar: toolNode?.avatar || '' toolCalls: toolCalls,
finish_reason,
inputTokens: usage?.prompt_tokens,
outputTokens: usage?.completion_tokens
}; };
});
if (answer) {
workflowStreamResponse?.({
event: SseResponseEventEnum.fastAnswer,
data: textAdaptGptResponse({
text: removeDatasetCiteText(answer, retainDatasetCite)
})
});
} }
})();
return { if (!answer && !reasoningContent && toolCalls.length === 0) {
answer,
toolCalls: toolCalls,
finish_reason,
inputTokens: usage?.prompt_tokens,
outputTokens: usage?.completion_tokens
};
}
})();
if (!answer && toolCalls.length === 0) {
return Promise.reject(getEmptyResponseTip()); return Promise.reject(getEmptyResponseTip());
} }
@@ -501,12 +502,13 @@ export const runToolWithToolChoice = async (
if (toolCalls.length > 0) { if (toolCalls.length > 0) {
// Run the tool, combine its results, and perform another round of AI calls // Run the tool, combine its results, and perform another round of AI calls
const assistantToolMsgParams: ChatCompletionAssistantMessageParam[] = [ const assistantToolMsgParams: ChatCompletionMessageParam[] = [
...(answer ...(answer || reasoningContent
? [ ? [
{ {
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant', role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
content: answer content: answer,
reasoning_text: reasoningContent
} }
] ]
: []), : []),
@@ -627,9 +629,10 @@ export const runToolWithToolChoice = async (
); );
} else { } else {
// No tool is invoked, indicating that the process is over // No tool is invoked, indicating that the process is over
const gptAssistantResponse: ChatCompletionAssistantMessageParam = { const gptAssistantResponse: ChatCompletionMessageParam = {
role: ChatCompletionRequestMessageRoleEnum.Assistant, role: ChatCompletionRequestMessageRoleEnum.Assistant,
content: answer content: answer,
reasoning_text: reasoningContent
}; };
const completeMessages = filterMessages.concat(gptAssistantResponse); const completeMessages = filterMessages.concat(gptAssistantResponse);
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools)); inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
@@ -671,34 +674,23 @@ async function streamResponse({
readStream: stream readStream: stream
}); });
let textAnswer = '';
let callingTool: { name: string; arguments: string } | null = null; let callingTool: { name: string; arguments: string } | null = null;
let toolCalls: ChatCompletionMessageToolCall[] = []; let toolCalls: ChatCompletionMessageToolCall[] = [];
let finish_reason: CompletionFinishReason = null;
let usage = getLLMDefaultUsage();
const { parsePart } = parseLLMStreamResponse(); const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
for await (const part of stream) { for await (const part of stream) {
usage = part.usage || usage;
if (res.closed) { if (res.closed) {
stream.controller?.abort(); stream.controller?.abort();
finish_reason = 'close'; updateFinishReason('close');
break; break;
} }
const { const { reasoningContent, responseContent } = parsePart({
reasoningContent,
content: toolChoiceContent,
responseContent,
finishReason
} = parsePart({
part, part,
parseThinkTag: true, parseThinkTag: true,
retainDatasetCite retainDatasetCite
}); });
textAnswer += toolChoiceContent;
finish_reason = finishReason || finish_reason;
const responseChoice = part.choices?.[0]?.delta; const responseChoice = part.choices?.[0]?.delta;
@@ -800,5 +792,13 @@ async function streamResponse({
} }
} }
return { answer: textAnswer, toolCalls: toolCalls.filter(Boolean), finish_reason, usage }; const { reasoningContent, content, finish_reason, usage } = getResponseData();
return {
reasoningContent,
answer: content,
toolCalls: toolCalls.filter(Boolean),
finish_reason,
usage
};
} }

View File

@@ -556,30 +556,21 @@ async function streamResponse({
res, res,
readStream: stream readStream: stream
}); });
let answer = '';
let reasoning = '';
let finish_reason: CompletionFinishReason = null;
let usage: CompletionUsage = getLLMDefaultUsage();
const { parsePart } = parseLLMStreamResponse(); const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
for await (const part of stream) { for await (const part of stream) {
usage = part.usage || usage;
if (res.closed) { if (res.closed) {
stream.controller?.abort(); stream.controller?.abort();
finish_reason = 'close'; updateFinishReason('close');
break; break;
} }
const { reasoningContent, content, responseContent, finishReason } = parsePart({ const { reasoningContent, responseContent } = parsePart({
part, part,
parseThinkTag, parseThinkTag,
retainDatasetCite retainDatasetCite
}); });
finish_reason = finish_reason || finishReason;
answer += content;
reasoning += reasoningContent;
if (aiChatReasoning && reasoningContent) { if (aiChatReasoning && reasoningContent) {
workflowStreamResponse?.({ workflowStreamResponse?.({
@@ -602,5 +593,7 @@ async function streamResponse({
} }
} }
const { reasoningContent: reasoning, content: answer, finish_reason, usage } = getResponseData();
return { answer, reasoning, finish_reason, usage }; return { answer, reasoning, finish_reason, usage };
} }