mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-21 11:43:56 +00:00
fix: stream response (#4853)
This commit is contained in:
@@ -19,4 +19,6 @@ weight: 790
|
|||||||
|
|
||||||
## 🐛 修复
|
## 🐛 修复
|
||||||
|
|
||||||
1. 全文检索多知识库时排序得分排序不正确
|
1. 全文检索多知识库时排序得分排序不正确。
|
||||||
|
2. 流响应捕获 finish_reason 可能不正确。
|
||||||
|
3. 工具调用模式,未保存思考输出。
|
@@ -18,15 +18,17 @@ import json5 from 'json5';
|
|||||||
*/
|
*/
|
||||||
export const computedMaxToken = ({
|
export const computedMaxToken = ({
|
||||||
maxToken,
|
maxToken,
|
||||||
model
|
model,
|
||||||
|
min
|
||||||
}: {
|
}: {
|
||||||
maxToken?: number;
|
maxToken?: number;
|
||||||
model: LLMModelItemType;
|
model: LLMModelItemType;
|
||||||
|
min?: number;
|
||||||
}) => {
|
}) => {
|
||||||
if (maxToken === undefined) return;
|
if (maxToken === undefined) return;
|
||||||
|
|
||||||
maxToken = Math.min(maxToken, model.maxResponse);
|
maxToken = Math.min(maxToken, model.maxResponse);
|
||||||
return maxToken;
|
return Math.max(maxToken, min || 0);
|
||||||
};
|
};
|
||||||
|
|
||||||
// FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
|
// FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
|
||||||
@@ -178,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
text: parseReasoningContent(answer)[1],
|
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
|
||||||
usage,
|
usage,
|
||||||
toolCalls
|
toolCalls
|
||||||
};
|
};
|
||||||
@@ -192,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
|
|||||||
}> => {
|
}> => {
|
||||||
const answer = response.choices?.[0]?.message?.content || '';
|
const answer = response.choices?.[0]?.message?.content || '';
|
||||||
const toolCalls = response.choices?.[0]?.message?.tool_calls;
|
const toolCalls = response.choices?.[0]?.message?.tool_calls;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
text: answer,
|
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
|
||||||
usage: response.usage,
|
usage: response.usage,
|
||||||
toolCalls
|
toolCalls
|
||||||
};
|
};
|
||||||
@@ -240,6 +243,12 @@ export const parseLLMStreamResponse = () => {
|
|||||||
let citeBuffer = '';
|
let citeBuffer = '';
|
||||||
const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
|
const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
|
||||||
|
|
||||||
|
// Buffer
|
||||||
|
let buffer_finishReason: CompletionFinishReason = null;
|
||||||
|
let buffer_usage: CompletionUsage = getLLMDefaultUsage();
|
||||||
|
let buffer_reasoningContent = '';
|
||||||
|
let buffer_content = '';
|
||||||
|
|
||||||
/*
|
/*
|
||||||
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
|
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
|
||||||
retainDatasetCite -
|
retainDatasetCite -
|
||||||
@@ -257,6 +266,7 @@ export const parseLLMStreamResponse = () => {
|
|||||||
};
|
};
|
||||||
finish_reason?: CompletionFinishReason;
|
finish_reason?: CompletionFinishReason;
|
||||||
}[];
|
}[];
|
||||||
|
usage?: CompletionUsage;
|
||||||
};
|
};
|
||||||
parseThinkTag?: boolean;
|
parseThinkTag?: boolean;
|
||||||
retainDatasetCite?: boolean;
|
retainDatasetCite?: boolean;
|
||||||
@@ -266,72 +276,71 @@ export const parseLLMStreamResponse = () => {
|
|||||||
responseContent: string;
|
responseContent: string;
|
||||||
finishReason: CompletionFinishReason;
|
finishReason: CompletionFinishReason;
|
||||||
} => {
|
} => {
|
||||||
const finishReason = part.choices?.[0]?.finish_reason || null;
|
const data = (() => {
|
||||||
const content = part.choices?.[0]?.delta?.content || '';
|
buffer_usage = part.usage || buffer_usage;
|
||||||
// @ts-ignore
|
|
||||||
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
|
|
||||||
const isStreamEnd = !!finishReason;
|
|
||||||
|
|
||||||
// Parse think
|
const finishReason = part.choices?.[0]?.finish_reason || null;
|
||||||
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
|
buffer_finishReason = finishReason || buffer_finishReason;
|
||||||
if (reasoningContent || !parseThinkTag) {
|
|
||||||
isInThinkTag = false;
|
|
||||||
return { reasoningContent, content };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!content) {
|
const content = part.choices?.[0]?.delta?.content || '';
|
||||||
return {
|
// @ts-ignore
|
||||||
reasoningContent: '',
|
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
|
||||||
content: ''
|
const isStreamEnd = !!buffer_finishReason;
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
|
// Parse think
|
||||||
if (isInThinkTag === false) {
|
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
|
||||||
return {
|
(() => {
|
||||||
reasoningContent: '',
|
if (reasoningContent || !parseThinkTag) {
|
||||||
content
|
isInThinkTag = false;
|
||||||
};
|
return { reasoningContent, content };
|
||||||
}
|
}
|
||||||
|
|
||||||
// 检测是否为 think 标签开头的数据
|
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
|
||||||
if (isInThinkTag === undefined) {
|
if (isInThinkTag === false) {
|
||||||
// Parse content think and answer
|
|
||||||
startTagBuffer += content;
|
|
||||||
// 太少内容时候,暂时不解析
|
|
||||||
if (startTagBuffer.length < thinkStartChars.length) {
|
|
||||||
if (isStreamEnd) {
|
|
||||||
const tmpContent = startTagBuffer;
|
|
||||||
startTagBuffer = '';
|
|
||||||
return {
|
return {
|
||||||
reasoningContent: '',
|
reasoningContent: '',
|
||||||
content: tmpContent
|
content
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return {
|
|
||||||
reasoningContent: '',
|
|
||||||
content: ''
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
if (startTagBuffer.startsWith(thinkStartChars)) {
|
// 检测是否为 think 标签开头的数据
|
||||||
isInThinkTag = true;
|
if (isInThinkTag === undefined) {
|
||||||
return {
|
// Parse content think and answer
|
||||||
reasoningContent: startTagBuffer.slice(thinkStartChars.length),
|
startTagBuffer += content;
|
||||||
content: ''
|
// 太少内容时候,暂时不解析
|
||||||
};
|
if (startTagBuffer.length < thinkStartChars.length) {
|
||||||
}
|
if (isStreamEnd) {
|
||||||
|
const tmpContent = startTagBuffer;
|
||||||
|
startTagBuffer = '';
|
||||||
|
return {
|
||||||
|
reasoningContent: '',
|
||||||
|
content: tmpContent
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
reasoningContent: '',
|
||||||
|
content: ''
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
|
if (startTagBuffer.startsWith(thinkStartChars)) {
|
||||||
isInThinkTag = false;
|
isInThinkTag = true;
|
||||||
return {
|
return {
|
||||||
reasoningContent: '',
|
reasoningContent: startTagBuffer.slice(thinkStartChars.length),
|
||||||
content: startTagBuffer
|
content: ''
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
|
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
|
||||||
/*
|
isInThinkTag = false;
|
||||||
|
return {
|
||||||
|
reasoningContent: '',
|
||||||
|
content: startTagBuffer
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
|
||||||
|
/*
|
||||||
检测 </think> 方案。
|
检测 </think> 方案。
|
||||||
存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。
|
存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。
|
||||||
content 返回值包含以下几种情况:
|
content 返回值包含以下几种情况:
|
||||||
@@ -342,124 +351,145 @@ export const parseLLMStreamResponse = () => {
|
|||||||
</think>abc - 完全命中尾标签
|
</think>abc - 完全命中尾标签
|
||||||
k>abc - 命中一部分尾标签
|
k>abc - 命中一部分尾标签
|
||||||
*/
|
*/
|
||||||
// endTagBuffer 专门用来记录疑似尾标签的内容
|
// endTagBuffer 专门用来记录疑似尾标签的内容
|
||||||
if (endTagBuffer) {
|
if (endTagBuffer) {
|
||||||
endTagBuffer += content;
|
endTagBuffer += content;
|
||||||
if (endTagBuffer.includes(thinkEndChars)) {
|
if (endTagBuffer.includes(thinkEndChars)) {
|
||||||
isInThinkTag = false;
|
isInThinkTag = false;
|
||||||
const answer = endTagBuffer.slice(thinkEndChars.length);
|
const answer = endTagBuffer.slice(thinkEndChars.length);
|
||||||
return {
|
return {
|
||||||
reasoningContent: '',
|
reasoningContent: '',
|
||||||
content: answer
|
content: answer
|
||||||
};
|
};
|
||||||
} else if (endTagBuffer.length >= thinkEndChars.length) {
|
} else if (endTagBuffer.length >= thinkEndChars.length) {
|
||||||
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
|
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
|
||||||
const tmp = endTagBuffer;
|
const tmp = endTagBuffer;
|
||||||
endTagBuffer = '';
|
endTagBuffer = '';
|
||||||
return {
|
return {
|
||||||
reasoningContent: tmp,
|
reasoningContent: tmp,
|
||||||
content: ''
|
content: ''
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return {
|
|
||||||
reasoningContent: '',
|
|
||||||
content: ''
|
|
||||||
};
|
|
||||||
} else if (content.includes(thinkEndChars)) {
|
|
||||||
// 返回内容,完整命中</think>,直接结束
|
|
||||||
isInThinkTag = false;
|
|
||||||
const [think, answer] = content.split(thinkEndChars);
|
|
||||||
return {
|
|
||||||
reasoningContent: think,
|
|
||||||
content: answer
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
// 无 buffer,且未命中 </think>,开始疑似 </think> 检测。
|
|
||||||
for (let i = 1; i < thinkEndChars.length; i++) {
|
|
||||||
const partialEndTag = thinkEndChars.slice(0, i);
|
|
||||||
// 命中一部分尾标签
|
|
||||||
if (content.endsWith(partialEndTag)) {
|
|
||||||
const think = content.slice(0, -partialEndTag.length);
|
|
||||||
endTagBuffer += partialEndTag;
|
|
||||||
return {
|
return {
|
||||||
reasoningContent: think,
|
reasoningContent: '',
|
||||||
content: ''
|
content: ''
|
||||||
};
|
};
|
||||||
|
} else if (content.includes(thinkEndChars)) {
|
||||||
|
// 返回内容,完整命中</think>,直接结束
|
||||||
|
isInThinkTag = false;
|
||||||
|
const [think, answer] = content.split(thinkEndChars);
|
||||||
|
return {
|
||||||
|
reasoningContent: think,
|
||||||
|
content: answer
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// 无 buffer,且未命中 </think>,开始疑似 </think> 检测。
|
||||||
|
for (let i = 1; i < thinkEndChars.length; i++) {
|
||||||
|
const partialEndTag = thinkEndChars.slice(0, i);
|
||||||
|
// 命中一部分尾标签
|
||||||
|
if (content.endsWith(partialEndTag)) {
|
||||||
|
const think = content.slice(0, -partialEndTag.length);
|
||||||
|
endTagBuffer += partialEndTag;
|
||||||
|
return {
|
||||||
|
reasoningContent: think,
|
||||||
|
content: ''
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
// 完全未命中尾标签,还是 think 阶段。
|
||||||
|
return {
|
||||||
|
reasoningContent: content,
|
||||||
|
content: ''
|
||||||
|
};
|
||||||
|
})();
|
||||||
|
|
||||||
|
// Parse datset cite
|
||||||
|
if (retainDatasetCite) {
|
||||||
|
return {
|
||||||
|
reasoningContent: parsedThinkReasoningContent,
|
||||||
|
content: parsedThinkContent,
|
||||||
|
responseContent: parsedThinkContent,
|
||||||
|
finishReason: buffer_finishReason
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// 完全未命中尾标签,还是 think 阶段。
|
// 缓存包含 [ 的字符串,直到超出 maxCiteBufferLength 再一次性返回
|
||||||
return {
|
const parseCite = (text: string) => {
|
||||||
reasoningContent: content,
|
// 结束时,返回所有剩余内容
|
||||||
content: ''
|
if (isStreamEnd) {
|
||||||
};
|
const content = citeBuffer + text;
|
||||||
})();
|
return {
|
||||||
|
content: removeDatasetCiteText(content, false)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// 新内容包含 [,初始化缓冲数据
|
||||||
|
if (text.includes('[')) {
|
||||||
|
const index = text.indexOf('[');
|
||||||
|
const beforeContent = citeBuffer + text.slice(0, index);
|
||||||
|
citeBuffer = text.slice(index);
|
||||||
|
|
||||||
|
// beforeContent 可能是:普通字符串,带 [ 的字符串
|
||||||
|
return {
|
||||||
|
content: removeDatasetCiteText(beforeContent, false)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// 处于 Cite 缓冲区,判断是否满足条件
|
||||||
|
else if (citeBuffer) {
|
||||||
|
citeBuffer += text;
|
||||||
|
|
||||||
|
// 检查缓冲区长度是否达到完整Quote长度或已经流结束
|
||||||
|
if (citeBuffer.length >= maxCiteBufferLength) {
|
||||||
|
const content = removeDatasetCiteText(citeBuffer, false);
|
||||||
|
citeBuffer = '';
|
||||||
|
|
||||||
|
return {
|
||||||
|
content
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// 暂时不返回内容
|
||||||
|
return { content: '' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: text
|
||||||
|
};
|
||||||
|
};
|
||||||
|
const { content: pasedCiteContent } = parseCite(parsedThinkContent);
|
||||||
|
|
||||||
// Parse datset cite
|
|
||||||
if (retainDatasetCite) {
|
|
||||||
return {
|
return {
|
||||||
reasoningContent: parsedThinkReasoningContent,
|
reasoningContent: parsedThinkReasoningContent,
|
||||||
content: parsedThinkContent,
|
content: parsedThinkContent,
|
||||||
responseContent: parsedThinkContent,
|
responseContent: pasedCiteContent,
|
||||||
finishReason
|
finishReason: buffer_finishReason
|
||||||
};
|
};
|
||||||
}
|
})();
|
||||||
|
|
||||||
// 缓存包含 [ 的字符串,直到超出 maxCiteBufferLength 再一次性返回
|
buffer_reasoningContent += data.reasoningContent;
|
||||||
const parseCite = (text: string) => {
|
buffer_content += data.content;
|
||||||
// 结束时,返回所有剩余内容
|
|
||||||
if (isStreamEnd) {
|
|
||||||
const content = citeBuffer + text;
|
|
||||||
return {
|
|
||||||
content: removeDatasetCiteText(content, false)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// 新内容包含 [,初始化缓冲数据
|
return data;
|
||||||
if (text.includes('[')) {
|
};
|
||||||
const index = text.indexOf('[');
|
|
||||||
const beforeContent = citeBuffer + text.slice(0, index);
|
|
||||||
citeBuffer = text.slice(index);
|
|
||||||
|
|
||||||
// beforeContent 可能是:普通字符串,带 [ 的字符串
|
|
||||||
return {
|
|
||||||
content: removeDatasetCiteText(beforeContent, false)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
// 处于 Cite 缓冲区,判断是否满足条件
|
|
||||||
else if (citeBuffer) {
|
|
||||||
citeBuffer += text;
|
|
||||||
|
|
||||||
// 检查缓冲区长度是否达到完整Quote长度或已经流结束
|
|
||||||
if (citeBuffer.length >= maxCiteBufferLength) {
|
|
||||||
const content = removeDatasetCiteText(citeBuffer, false);
|
|
||||||
citeBuffer = '';
|
|
||||||
|
|
||||||
return {
|
|
||||||
content
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
// 暂时不返回内容
|
|
||||||
return { content: '' };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
content: text
|
|
||||||
};
|
|
||||||
};
|
|
||||||
const { content: pasedCiteContent } = parseCite(parsedThinkContent);
|
|
||||||
|
|
||||||
|
const getResponseData = () => {
|
||||||
return {
|
return {
|
||||||
reasoningContent: parsedThinkReasoningContent,
|
finish_reason: buffer_finishReason,
|
||||||
content: parsedThinkContent,
|
usage: buffer_usage,
|
||||||
responseContent: pasedCiteContent,
|
reasoningContent: buffer_reasoningContent,
|
||||||
finishReason
|
content: buffer_content
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const updateFinishReason = (finishReason: CompletionFinishReason) => {
|
||||||
|
buffer_finishReason = finishReason;
|
||||||
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
parsePart
|
parsePart,
|
||||||
|
getResponseData,
|
||||||
|
updateFinishReason
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@@ -1,13 +1,14 @@
|
|||||||
import { createChatCompletion } from '../../../../ai/config';
|
import { createChatCompletion } from '../../../../ai/config';
|
||||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||||
import {
|
import type {
|
||||||
type ChatCompletion,
|
ChatCompletion,
|
||||||
type StreamChatType,
|
StreamChatType,
|
||||||
type ChatCompletionMessageParam,
|
ChatCompletionMessageParam,
|
||||||
type ChatCompletionCreateParams,
|
ChatCompletionCreateParams,
|
||||||
type ChatCompletionMessageFunctionCall,
|
ChatCompletionMessageFunctionCall,
|
||||||
type ChatCompletionFunctionMessageParam,
|
ChatCompletionFunctionMessageParam,
|
||||||
type ChatCompletionAssistantMessageParam
|
ChatCompletionAssistantMessageParam,
|
||||||
|
CompletionFinishReason
|
||||||
} from '@fastgpt/global/core/ai/type.d';
|
} from '@fastgpt/global/core/ai/type.d';
|
||||||
import { type NextApiResponse } from 'next';
|
import { type NextApiResponse } from 'next';
|
||||||
import { responseWriteController } from '../../../../../common/response';
|
import { responseWriteController } from '../../../../../common/response';
|
||||||
@@ -259,14 +260,15 @@ export const runToolWithFunctionCall = async (
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let { answer, functionCalls, inputTokens, outputTokens } = await (async () => {
|
let { answer, functionCalls, inputTokens, outputTokens, finish_reason } = await (async () => {
|
||||||
if (isStreamResponse) {
|
if (isStreamResponse) {
|
||||||
if (!res || res.closed) {
|
if (!res || res.closed) {
|
||||||
return {
|
return {
|
||||||
answer: '',
|
answer: '',
|
||||||
functionCalls: [],
|
functionCalls: [],
|
||||||
inputTokens: 0,
|
inputTokens: 0,
|
||||||
outputTokens: 0
|
outputTokens: 0,
|
||||||
|
finish_reason: 'close' as const
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
const result = await streamResponse({
|
const result = await streamResponse({
|
||||||
@@ -281,10 +283,12 @@ export const runToolWithFunctionCall = async (
|
|||||||
answer: result.answer,
|
answer: result.answer,
|
||||||
functionCalls: result.functionCalls,
|
functionCalls: result.functionCalls,
|
||||||
inputTokens: result.usage.prompt_tokens,
|
inputTokens: result.usage.prompt_tokens,
|
||||||
outputTokens: result.usage.completion_tokens
|
outputTokens: result.usage.completion_tokens,
|
||||||
|
finish_reason: result.finish_reason
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
const result = aiResponse as ChatCompletion;
|
const result = aiResponse as ChatCompletion;
|
||||||
|
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
|
||||||
const function_call = result.choices?.[0]?.message?.function_call;
|
const function_call = result.choices?.[0]?.message?.function_call;
|
||||||
const usage = result.usage;
|
const usage = result.usage;
|
||||||
|
|
||||||
@@ -315,7 +319,8 @@ export const runToolWithFunctionCall = async (
|
|||||||
answer,
|
answer,
|
||||||
functionCalls: toolCalls,
|
functionCalls: toolCalls,
|
||||||
inputTokens: usage?.prompt_tokens,
|
inputTokens: usage?.prompt_tokens,
|
||||||
outputTokens: usage?.completion_tokens
|
outputTokens: usage?.completion_tokens,
|
||||||
|
finish_reason
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
@@ -481,7 +486,8 @@ export const runToolWithFunctionCall = async (
|
|||||||
completeMessages,
|
completeMessages,
|
||||||
assistantResponses: toolNodeAssistants,
|
assistantResponses: toolNodeAssistants,
|
||||||
runTimes,
|
runTimes,
|
||||||
toolWorkflowInteractiveResponse
|
toolWorkflowInteractiveResponse,
|
||||||
|
finish_reason
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -495,7 +501,8 @@ export const runToolWithFunctionCall = async (
|
|||||||
toolNodeInputTokens,
|
toolNodeInputTokens,
|
||||||
toolNodeOutputTokens,
|
toolNodeOutputTokens,
|
||||||
assistantResponses: toolNodeAssistants,
|
assistantResponses: toolNodeAssistants,
|
||||||
runTimes
|
runTimes,
|
||||||
|
finish_reason
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
@@ -523,7 +530,8 @@ export const runToolWithFunctionCall = async (
|
|||||||
: outputTokens,
|
: outputTokens,
|
||||||
completeMessages,
|
completeMessages,
|
||||||
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
|
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
|
||||||
runTimes: (response?.runTimes || 0) + 1
|
runTimes: (response?.runTimes || 0) + 1,
|
||||||
|
finish_reason
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -546,28 +554,25 @@ async function streamResponse({
|
|||||||
readStream: stream
|
readStream: stream
|
||||||
});
|
});
|
||||||
|
|
||||||
let textAnswer = '';
|
|
||||||
let functionCalls: ChatCompletionMessageFunctionCall[] = [];
|
let functionCalls: ChatCompletionMessageFunctionCall[] = [];
|
||||||
let functionId = getNanoid();
|
let functionId = getNanoid();
|
||||||
let usage = getLLMDefaultUsage();
|
|
||||||
|
|
||||||
const { parsePart } = parseLLMStreamResponse();
|
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||||
|
|
||||||
for await (const part of stream) {
|
for await (const part of stream) {
|
||||||
usage = part.usage || usage;
|
|
||||||
if (res.closed) {
|
if (res.closed) {
|
||||||
stream.controller?.abort();
|
stream.controller?.abort();
|
||||||
|
updateFinishReason('close');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const { content: toolChoiceContent, responseContent } = parsePart({
|
const { responseContent } = parsePart({
|
||||||
part,
|
part,
|
||||||
parseThinkTag: false,
|
parseThinkTag: false,
|
||||||
retainDatasetCite
|
retainDatasetCite
|
||||||
});
|
});
|
||||||
|
|
||||||
const responseChoice = part.choices?.[0]?.delta;
|
const responseChoice = part.choices?.[0]?.delta;
|
||||||
textAnswer += toolChoiceContent;
|
|
||||||
|
|
||||||
if (responseContent) {
|
if (responseContent) {
|
||||||
workflowStreamResponse?.({
|
workflowStreamResponse?.({
|
||||||
@@ -577,7 +582,7 @@ async function streamResponse({
|
|||||||
text: responseContent
|
text: responseContent
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
} else if (responseChoice.function_call) {
|
} else if (responseChoice?.function_call) {
|
||||||
const functionCall: {
|
const functionCall: {
|
||||||
arguments?: string;
|
arguments?: string;
|
||||||
name?: string;
|
name?: string;
|
||||||
@@ -640,5 +645,7 @@ async function streamResponse({
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { answer: textAnswer, functionCalls, usage };
|
const { content, finish_reason, usage } = getResponseData();
|
||||||
|
|
||||||
|
return { answer: content, functionCalls, finish_reason, usage };
|
||||||
}
|
}
|
||||||
|
@@ -220,7 +220,8 @@ export const runToolWithPromptCall = async (
|
|||||||
|
|
||||||
const max_tokens = computedMaxToken({
|
const max_tokens = computedMaxToken({
|
||||||
model: toolModel,
|
model: toolModel,
|
||||||
maxToken
|
maxToken,
|
||||||
|
min: 100
|
||||||
});
|
});
|
||||||
const filterMessages = await filterGPTMessageByMaxContext({
|
const filterMessages = await filterGPTMessageByMaxContext({
|
||||||
messages,
|
messages,
|
||||||
@@ -592,28 +593,22 @@ async function streamResponse({
|
|||||||
|
|
||||||
let startResponseWrite = false;
|
let startResponseWrite = false;
|
||||||
let answer = '';
|
let answer = '';
|
||||||
let reasoning = '';
|
|
||||||
let finish_reason: CompletionFinishReason = null;
|
|
||||||
let usage = getLLMDefaultUsage();
|
|
||||||
|
|
||||||
const { parsePart } = parseLLMStreamResponse();
|
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||||
|
|
||||||
for await (const part of stream) {
|
for await (const part of stream) {
|
||||||
usage = part.usage || usage;
|
|
||||||
if (res.closed) {
|
if (res.closed) {
|
||||||
stream.controller?.abort();
|
stream.controller?.abort();
|
||||||
finish_reason = 'close';
|
updateFinishReason('close');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const { reasoningContent, content, responseContent, finishReason } = parsePart({
|
const { reasoningContent, content, responseContent } = parsePart({
|
||||||
part,
|
part,
|
||||||
parseThinkTag: aiChatReasoning,
|
parseThinkTag: aiChatReasoning,
|
||||||
retainDatasetCite
|
retainDatasetCite
|
||||||
});
|
});
|
||||||
finish_reason = finish_reason || finishReason;
|
|
||||||
answer += content;
|
answer += content;
|
||||||
reasoning += reasoningContent;
|
|
||||||
|
|
||||||
// Reasoning response
|
// Reasoning response
|
||||||
if (aiChatReasoning && reasoningContent) {
|
if (aiChatReasoning && reasoningContent) {
|
||||||
@@ -658,7 +653,9 @@ async function streamResponse({
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { answer, reasoning, finish_reason, usage };
|
const { reasoningContent, content, finish_reason, usage } = getResponseData();
|
||||||
|
|
||||||
|
return { answer: content, reasoning: reasoningContent, finish_reason, usage };
|
||||||
}
|
}
|
||||||
|
|
||||||
const parseAnswer = (
|
const parseAnswer = (
|
||||||
|
@@ -7,17 +7,13 @@ import {
|
|||||||
type ChatCompletionToolMessageParam,
|
type ChatCompletionToolMessageParam,
|
||||||
type ChatCompletionMessageParam,
|
type ChatCompletionMessageParam,
|
||||||
type ChatCompletionTool,
|
type ChatCompletionTool,
|
||||||
type ChatCompletionAssistantMessageParam,
|
|
||||||
type CompletionFinishReason
|
type CompletionFinishReason
|
||||||
} from '@fastgpt/global/core/ai/type';
|
} from '@fastgpt/global/core/ai/type';
|
||||||
import { type NextApiResponse } from 'next';
|
import { type NextApiResponse } from 'next';
|
||||||
import { responseWriteController } from '../../../../../common/response';
|
import { responseWriteController } from '../../../../../common/response';
|
||||||
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||||
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
|
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
|
||||||
import {
|
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||||
ChatCompletionRequestMessageRoleEnum,
|
|
||||||
getLLMDefaultUsage
|
|
||||||
} from '@fastgpt/global/core/ai/constants';
|
|
||||||
import { dispatchWorkFlow } from '../../index';
|
import { dispatchWorkFlow } from '../../index';
|
||||||
import {
|
import {
|
||||||
type DispatchToolModuleProps,
|
type DispatchToolModuleProps,
|
||||||
@@ -254,7 +250,8 @@ export const runToolWithToolChoice = async (
|
|||||||
|
|
||||||
const max_tokens = computedMaxToken({
|
const max_tokens = computedMaxToken({
|
||||||
model: toolModel,
|
model: toolModel,
|
||||||
maxToken
|
maxToken,
|
||||||
|
min: 100
|
||||||
});
|
});
|
||||||
|
|
||||||
// Filter histories by maxToken
|
// Filter histories by maxToken
|
||||||
@@ -319,97 +316,101 @@ export const runToolWithToolChoice = async (
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let { answer, toolCalls, finish_reason, inputTokens, outputTokens } = await (async () => {
|
let { reasoningContent, answer, toolCalls, finish_reason, inputTokens, outputTokens } =
|
||||||
if (isStreamResponse) {
|
await (async () => {
|
||||||
if (!res || res.closed) {
|
if (isStreamResponse) {
|
||||||
return {
|
if (!res || res.closed) {
|
||||||
answer: '',
|
return {
|
||||||
toolCalls: [],
|
reasoningContent: '',
|
||||||
finish_reason: 'close' as const,
|
answer: '',
|
||||||
inputTokens: 0,
|
toolCalls: [],
|
||||||
outputTokens: 0
|
finish_reason: 'close' as const,
|
||||||
};
|
inputTokens: 0,
|
||||||
}
|
outputTokens: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const result = await streamResponse({
|
const result = await streamResponse({
|
||||||
res,
|
res,
|
||||||
workflowStreamResponse,
|
workflowStreamResponse,
|
||||||
toolNodes,
|
toolNodes,
|
||||||
stream: aiResponse,
|
stream: aiResponse,
|
||||||
aiChatReasoning,
|
aiChatReasoning,
|
||||||
retainDatasetCite
|
retainDatasetCite
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
answer: result.answer,
|
|
||||||
toolCalls: result.toolCalls,
|
|
||||||
finish_reason: result.finish_reason,
|
|
||||||
inputTokens: result.usage.prompt_tokens,
|
|
||||||
outputTokens: result.usage.completion_tokens
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
const result = aiResponse as ChatCompletion;
|
|
||||||
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
|
|
||||||
const calls = result.choices?.[0]?.message?.tool_calls || [];
|
|
||||||
const answer = result.choices?.[0]?.message?.content || '';
|
|
||||||
// @ts-ignore
|
|
||||||
const reasoningContent = result.choices?.[0]?.message?.reasoning_content || '';
|
|
||||||
const usage = result.usage;
|
|
||||||
|
|
||||||
if (aiChatReasoning && reasoningContent) {
|
|
||||||
workflowStreamResponse?.({
|
|
||||||
event: SseResponseEventEnum.fastAnswer,
|
|
||||||
data: textAdaptGptResponse({
|
|
||||||
reasoning_content: removeDatasetCiteText(reasoningContent, retainDatasetCite)
|
|
||||||
})
|
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
// 格式化 toolCalls
|
return {
|
||||||
const toolCalls = calls.map((tool) => {
|
reasoningContent: result.reasoningContent,
|
||||||
const toolNode = toolNodes.find((item) => item.nodeId === tool.function?.name);
|
answer: result.answer,
|
||||||
|
toolCalls: result.toolCalls,
|
||||||
|
finish_reason: result.finish_reason,
|
||||||
|
inputTokens: result.usage.prompt_tokens,
|
||||||
|
outputTokens: result.usage.completion_tokens
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
const result = aiResponse as ChatCompletion;
|
||||||
|
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
|
||||||
|
const calls = result.choices?.[0]?.message?.tool_calls || [];
|
||||||
|
const answer = result.choices?.[0]?.message?.content || '';
|
||||||
|
// @ts-ignore
|
||||||
|
const reasoningContent = result.choices?.[0]?.message?.reasoning_content || '';
|
||||||
|
const usage = result.usage;
|
||||||
|
|
||||||
// 不支持 stream 模式的模型的这里需要补一个响应给客户端
|
if (aiChatReasoning && reasoningContent) {
|
||||||
workflowStreamResponse?.({
|
workflowStreamResponse?.({
|
||||||
event: SseResponseEventEnum.toolCall,
|
event: SseResponseEventEnum.fastAnswer,
|
||||||
data: {
|
data: textAdaptGptResponse({
|
||||||
tool: {
|
reasoning_content: removeDatasetCiteText(reasoningContent, retainDatasetCite)
|
||||||
id: tool.id,
|
})
|
||||||
toolName: toolNode?.name || '',
|
});
|
||||||
toolAvatar: toolNode?.avatar || '',
|
}
|
||||||
functionName: tool.function.name,
|
|
||||||
params: tool.function?.arguments ?? '',
|
// 格式化 toolCalls
|
||||||
response: ''
|
const toolCalls = calls.map((tool) => {
|
||||||
|
const toolNode = toolNodes.find((item) => item.nodeId === tool.function?.name);
|
||||||
|
|
||||||
|
// 不支持 stream 模式的模型的这里需要补一个响应给客户端
|
||||||
|
workflowStreamResponse?.({
|
||||||
|
event: SseResponseEventEnum.toolCall,
|
||||||
|
data: {
|
||||||
|
tool: {
|
||||||
|
id: tool.id,
|
||||||
|
toolName: toolNode?.name || '',
|
||||||
|
toolAvatar: toolNode?.avatar || '',
|
||||||
|
functionName: tool.function.name,
|
||||||
|
params: tool.function?.arguments ?? '',
|
||||||
|
response: ''
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
...tool,
|
||||||
|
toolName: toolNode?.name || '',
|
||||||
|
toolAvatar: toolNode?.avatar || ''
|
||||||
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (answer) {
|
||||||
|
workflowStreamResponse?.({
|
||||||
|
event: SseResponseEventEnum.fastAnswer,
|
||||||
|
data: textAdaptGptResponse({
|
||||||
|
text: removeDatasetCiteText(answer, retainDatasetCite)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...tool,
|
reasoningContent: (reasoningContent as string) || '',
|
||||||
toolName: toolNode?.name || '',
|
answer,
|
||||||
toolAvatar: toolNode?.avatar || ''
|
toolCalls: toolCalls,
|
||||||
|
finish_reason,
|
||||||
|
inputTokens: usage?.prompt_tokens,
|
||||||
|
outputTokens: usage?.completion_tokens
|
||||||
};
|
};
|
||||||
});
|
|
||||||
|
|
||||||
if (answer) {
|
|
||||||
workflowStreamResponse?.({
|
|
||||||
event: SseResponseEventEnum.fastAnswer,
|
|
||||||
data: textAdaptGptResponse({
|
|
||||||
text: removeDatasetCiteText(answer, retainDatasetCite)
|
|
||||||
})
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
})();
|
||||||
return {
|
if (!answer && !reasoningContent && toolCalls.length === 0) {
|
||||||
answer,
|
|
||||||
toolCalls: toolCalls,
|
|
||||||
finish_reason,
|
|
||||||
inputTokens: usage?.prompt_tokens,
|
|
||||||
outputTokens: usage?.completion_tokens
|
|
||||||
};
|
|
||||||
}
|
|
||||||
})();
|
|
||||||
if (!answer && toolCalls.length === 0) {
|
|
||||||
return Promise.reject(getEmptyResponseTip());
|
return Promise.reject(getEmptyResponseTip());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -501,12 +502,13 @@ export const runToolWithToolChoice = async (
|
|||||||
|
|
||||||
if (toolCalls.length > 0) {
|
if (toolCalls.length > 0) {
|
||||||
// Run the tool, combine its results, and perform another round of AI calls
|
// Run the tool, combine its results, and perform another round of AI calls
|
||||||
const assistantToolMsgParams: ChatCompletionAssistantMessageParam[] = [
|
const assistantToolMsgParams: ChatCompletionMessageParam[] = [
|
||||||
...(answer
|
...(answer || reasoningContent
|
||||||
? [
|
? [
|
||||||
{
|
{
|
||||||
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
|
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
|
||||||
content: answer
|
content: answer,
|
||||||
|
reasoning_text: reasoningContent
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
: []),
|
: []),
|
||||||
@@ -627,9 +629,10 @@ export const runToolWithToolChoice = async (
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
// No tool is invoked, indicating that the process is over
|
// No tool is invoked, indicating that the process is over
|
||||||
const gptAssistantResponse: ChatCompletionAssistantMessageParam = {
|
const gptAssistantResponse: ChatCompletionMessageParam = {
|
||||||
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||||
content: answer
|
content: answer,
|
||||||
|
reasoning_text: reasoningContent
|
||||||
};
|
};
|
||||||
const completeMessages = filterMessages.concat(gptAssistantResponse);
|
const completeMessages = filterMessages.concat(gptAssistantResponse);
|
||||||
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
|
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
|
||||||
@@ -671,34 +674,23 @@ async function streamResponse({
|
|||||||
readStream: stream
|
readStream: stream
|
||||||
});
|
});
|
||||||
|
|
||||||
let textAnswer = '';
|
|
||||||
let callingTool: { name: string; arguments: string } | null = null;
|
let callingTool: { name: string; arguments: string } | null = null;
|
||||||
let toolCalls: ChatCompletionMessageToolCall[] = [];
|
let toolCalls: ChatCompletionMessageToolCall[] = [];
|
||||||
let finish_reason: CompletionFinishReason = null;
|
|
||||||
let usage = getLLMDefaultUsage();
|
|
||||||
|
|
||||||
const { parsePart } = parseLLMStreamResponse();
|
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||||
|
|
||||||
for await (const part of stream) {
|
for await (const part of stream) {
|
||||||
usage = part.usage || usage;
|
|
||||||
if (res.closed) {
|
if (res.closed) {
|
||||||
stream.controller?.abort();
|
stream.controller?.abort();
|
||||||
finish_reason = 'close';
|
updateFinishReason('close');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const {
|
const { reasoningContent, responseContent } = parsePart({
|
||||||
reasoningContent,
|
|
||||||
content: toolChoiceContent,
|
|
||||||
responseContent,
|
|
||||||
finishReason
|
|
||||||
} = parsePart({
|
|
||||||
part,
|
part,
|
||||||
parseThinkTag: true,
|
parseThinkTag: true,
|
||||||
retainDatasetCite
|
retainDatasetCite
|
||||||
});
|
});
|
||||||
textAnswer += toolChoiceContent;
|
|
||||||
finish_reason = finishReason || finish_reason;
|
|
||||||
|
|
||||||
const responseChoice = part.choices?.[0]?.delta;
|
const responseChoice = part.choices?.[0]?.delta;
|
||||||
|
|
||||||
@@ -800,5 +792,13 @@ async function streamResponse({
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { answer: textAnswer, toolCalls: toolCalls.filter(Boolean), finish_reason, usage };
|
const { reasoningContent, content, finish_reason, usage } = getResponseData();
|
||||||
|
|
||||||
|
return {
|
||||||
|
reasoningContent,
|
||||||
|
answer: content,
|
||||||
|
toolCalls: toolCalls.filter(Boolean),
|
||||||
|
finish_reason,
|
||||||
|
usage
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
@@ -556,30 +556,21 @@ async function streamResponse({
|
|||||||
res,
|
res,
|
||||||
readStream: stream
|
readStream: stream
|
||||||
});
|
});
|
||||||
let answer = '';
|
|
||||||
let reasoning = '';
|
|
||||||
let finish_reason: CompletionFinishReason = null;
|
|
||||||
let usage: CompletionUsage = getLLMDefaultUsage();
|
|
||||||
|
|
||||||
const { parsePart } = parseLLMStreamResponse();
|
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||||
|
|
||||||
for await (const part of stream) {
|
for await (const part of stream) {
|
||||||
usage = part.usage || usage;
|
|
||||||
|
|
||||||
if (res.closed) {
|
if (res.closed) {
|
||||||
stream.controller?.abort();
|
stream.controller?.abort();
|
||||||
finish_reason = 'close';
|
updateFinishReason('close');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const { reasoningContent, content, responseContent, finishReason } = parsePart({
|
const { reasoningContent, responseContent } = parsePart({
|
||||||
part,
|
part,
|
||||||
parseThinkTag,
|
parseThinkTag,
|
||||||
retainDatasetCite
|
retainDatasetCite
|
||||||
});
|
});
|
||||||
finish_reason = finish_reason || finishReason;
|
|
||||||
answer += content;
|
|
||||||
reasoning += reasoningContent;
|
|
||||||
|
|
||||||
if (aiChatReasoning && reasoningContent) {
|
if (aiChatReasoning && reasoningContent) {
|
||||||
workflowStreamResponse?.({
|
workflowStreamResponse?.({
|
||||||
@@ -602,5 +593,7 @@ async function streamResponse({
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const { reasoningContent: reasoning, content: answer, finish_reason, usage } = getResponseData();
|
||||||
|
|
||||||
return { answer, reasoning, finish_reason, usage };
|
return { answer, reasoning, finish_reason, usage };
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user