mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-21 03:35:36 +00:00
fix: stream response (#4853)
This commit is contained in:
@@ -19,4 +19,6 @@ weight: 790
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 全文检索多知识库时排序得分排序不正确
|
||||
1. 全文检索多知识库时排序得分排序不正确。
|
||||
2. 流响应捕获 finish_reason 可能不正确。
|
||||
3. 工具调用模式,未保存思考输出。
|
@@ -18,15 +18,17 @@ import json5 from 'json5';
|
||||
*/
|
||||
export const computedMaxToken = ({
|
||||
maxToken,
|
||||
model
|
||||
model,
|
||||
min
|
||||
}: {
|
||||
maxToken?: number;
|
||||
model: LLMModelItemType;
|
||||
min?: number;
|
||||
}) => {
|
||||
if (maxToken === undefined) return;
|
||||
|
||||
maxToken = Math.min(maxToken, model.maxResponse);
|
||||
return maxToken;
|
||||
return Math.max(maxToken, min || 0);
|
||||
};
|
||||
|
||||
// FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
|
||||
@@ -178,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
|
||||
}
|
||||
}
|
||||
return {
|
||||
text: parseReasoningContent(answer)[1],
|
||||
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
|
||||
usage,
|
||||
toolCalls
|
||||
};
|
||||
@@ -192,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
|
||||
}> => {
|
||||
const answer = response.choices?.[0]?.message?.content || '';
|
||||
const toolCalls = response.choices?.[0]?.message?.tool_calls;
|
||||
|
||||
return {
|
||||
text: answer,
|
||||
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
|
||||
usage: response.usage,
|
||||
toolCalls
|
||||
};
|
||||
@@ -240,6 +243,12 @@ export const parseLLMStreamResponse = () => {
|
||||
let citeBuffer = '';
|
||||
const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
|
||||
|
||||
// Buffer
|
||||
let buffer_finishReason: CompletionFinishReason = null;
|
||||
let buffer_usage: CompletionUsage = getLLMDefaultUsage();
|
||||
let buffer_reasoningContent = '';
|
||||
let buffer_content = '';
|
||||
|
||||
/*
|
||||
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
|
||||
retainDatasetCite -
|
||||
@@ -257,6 +266,7 @@ export const parseLLMStreamResponse = () => {
|
||||
};
|
||||
finish_reason?: CompletionFinishReason;
|
||||
}[];
|
||||
usage?: CompletionUsage;
|
||||
};
|
||||
parseThinkTag?: boolean;
|
||||
retainDatasetCite?: boolean;
|
||||
@@ -266,26 +276,25 @@ export const parseLLMStreamResponse = () => {
|
||||
responseContent: string;
|
||||
finishReason: CompletionFinishReason;
|
||||
} => {
|
||||
const data = (() => {
|
||||
buffer_usage = part.usage || buffer_usage;
|
||||
|
||||
const finishReason = part.choices?.[0]?.finish_reason || null;
|
||||
buffer_finishReason = finishReason || buffer_finishReason;
|
||||
|
||||
const content = part.choices?.[0]?.delta?.content || '';
|
||||
// @ts-ignore
|
||||
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
|
||||
const isStreamEnd = !!finishReason;
|
||||
const isStreamEnd = !!buffer_finishReason;
|
||||
|
||||
// Parse think
|
||||
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
|
||||
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
|
||||
(() => {
|
||||
if (reasoningContent || !parseThinkTag) {
|
||||
isInThinkTag = false;
|
||||
return { reasoningContent, content };
|
||||
}
|
||||
|
||||
if (!content) {
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
|
||||
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
|
||||
if (isInThinkTag === false) {
|
||||
return {
|
||||
@@ -402,7 +411,7 @@ export const parseLLMStreamResponse = () => {
|
||||
reasoningContent: parsedThinkReasoningContent,
|
||||
content: parsedThinkContent,
|
||||
responseContent: parsedThinkContent,
|
||||
finishReason
|
||||
finishReason: buffer_finishReason
|
||||
};
|
||||
}
|
||||
|
||||
@@ -455,11 +464,32 @@ export const parseLLMStreamResponse = () => {
|
||||
reasoningContent: parsedThinkReasoningContent,
|
||||
content: parsedThinkContent,
|
||||
responseContent: pasedCiteContent,
|
||||
finishReason
|
||||
finishReason: buffer_finishReason
|
||||
};
|
||||
})();
|
||||
|
||||
buffer_reasoningContent += data.reasoningContent;
|
||||
buffer_content += data.content;
|
||||
|
||||
return data;
|
||||
};
|
||||
|
||||
const getResponseData = () => {
|
||||
return {
|
||||
finish_reason: buffer_finishReason,
|
||||
usage: buffer_usage,
|
||||
reasoningContent: buffer_reasoningContent,
|
||||
content: buffer_content
|
||||
};
|
||||
};
|
||||
|
||||
const updateFinishReason = (finishReason: CompletionFinishReason) => {
|
||||
buffer_finishReason = finishReason;
|
||||
};
|
||||
|
||||
return {
|
||||
parsePart
|
||||
parsePart,
|
||||
getResponseData,
|
||||
updateFinishReason
|
||||
};
|
||||
};
|
||||
|
@@ -1,13 +1,14 @@
|
||||
import { createChatCompletion } from '../../../../ai/config';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||
import {
|
||||
type ChatCompletion,
|
||||
type StreamChatType,
|
||||
type ChatCompletionMessageParam,
|
||||
type ChatCompletionCreateParams,
|
||||
type ChatCompletionMessageFunctionCall,
|
||||
type ChatCompletionFunctionMessageParam,
|
||||
type ChatCompletionAssistantMessageParam
|
||||
import type {
|
||||
ChatCompletion,
|
||||
StreamChatType,
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionCreateParams,
|
||||
ChatCompletionMessageFunctionCall,
|
||||
ChatCompletionFunctionMessageParam,
|
||||
ChatCompletionAssistantMessageParam,
|
||||
CompletionFinishReason
|
||||
} from '@fastgpt/global/core/ai/type.d';
|
||||
import { type NextApiResponse } from 'next';
|
||||
import { responseWriteController } from '../../../../../common/response';
|
||||
@@ -259,14 +260,15 @@ export const runToolWithFunctionCall = async (
|
||||
}
|
||||
});
|
||||
|
||||
let { answer, functionCalls, inputTokens, outputTokens } = await (async () => {
|
||||
let { answer, functionCalls, inputTokens, outputTokens, finish_reason } = await (async () => {
|
||||
if (isStreamResponse) {
|
||||
if (!res || res.closed) {
|
||||
return {
|
||||
answer: '',
|
||||
functionCalls: [],
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
outputTokens: 0,
|
||||
finish_reason: 'close' as const
|
||||
};
|
||||
}
|
||||
const result = await streamResponse({
|
||||
@@ -281,10 +283,12 @@ export const runToolWithFunctionCall = async (
|
||||
answer: result.answer,
|
||||
functionCalls: result.functionCalls,
|
||||
inputTokens: result.usage.prompt_tokens,
|
||||
outputTokens: result.usage.completion_tokens
|
||||
outputTokens: result.usage.completion_tokens,
|
||||
finish_reason: result.finish_reason
|
||||
};
|
||||
} else {
|
||||
const result = aiResponse as ChatCompletion;
|
||||
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
|
||||
const function_call = result.choices?.[0]?.message?.function_call;
|
||||
const usage = result.usage;
|
||||
|
||||
@@ -315,7 +319,8 @@ export const runToolWithFunctionCall = async (
|
||||
answer,
|
||||
functionCalls: toolCalls,
|
||||
inputTokens: usage?.prompt_tokens,
|
||||
outputTokens: usage?.completion_tokens
|
||||
outputTokens: usage?.completion_tokens,
|
||||
finish_reason
|
||||
};
|
||||
}
|
||||
})();
|
||||
@@ -481,7 +486,8 @@ export const runToolWithFunctionCall = async (
|
||||
completeMessages,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes,
|
||||
toolWorkflowInteractiveResponse
|
||||
toolWorkflowInteractiveResponse,
|
||||
finish_reason
|
||||
};
|
||||
}
|
||||
|
||||
@@ -495,7 +501,8 @@ export const runToolWithFunctionCall = async (
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes
|
||||
runTimes,
|
||||
finish_reason
|
||||
}
|
||||
);
|
||||
} else {
|
||||
@@ -523,7 +530,8 @@ export const runToolWithFunctionCall = async (
|
||||
: outputTokens,
|
||||
completeMessages,
|
||||
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
|
||||
runTimes: (response?.runTimes || 0) + 1
|
||||
runTimes: (response?.runTimes || 0) + 1,
|
||||
finish_reason
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -546,28 +554,25 @@ async function streamResponse({
|
||||
readStream: stream
|
||||
});
|
||||
|
||||
let textAnswer = '';
|
||||
let functionCalls: ChatCompletionMessageFunctionCall[] = [];
|
||||
let functionId = getNanoid();
|
||||
let usage = getLLMDefaultUsage();
|
||||
|
||||
const { parsePart } = parseLLMStreamResponse();
|
||||
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { content: toolChoiceContent, responseContent } = parsePart({
|
||||
const { responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: false,
|
||||
retainDatasetCite
|
||||
});
|
||||
|
||||
const responseChoice = part.choices?.[0]?.delta;
|
||||
textAnswer += toolChoiceContent;
|
||||
|
||||
if (responseContent) {
|
||||
workflowStreamResponse?.({
|
||||
@@ -577,7 +582,7 @@ async function streamResponse({
|
||||
text: responseContent
|
||||
})
|
||||
});
|
||||
} else if (responseChoice.function_call) {
|
||||
} else if (responseChoice?.function_call) {
|
||||
const functionCall: {
|
||||
arguments?: string;
|
||||
name?: string;
|
||||
@@ -640,5 +645,7 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer: textAnswer, functionCalls, usage };
|
||||
const { content, finish_reason, usage } = getResponseData();
|
||||
|
||||
return { answer: content, functionCalls, finish_reason, usage };
|
||||
}
|
||||
|
@@ -220,7 +220,8 @@ export const runToolWithPromptCall = async (
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
maxToken,
|
||||
min: 100
|
||||
});
|
||||
const filterMessages = await filterGPTMessageByMaxContext({
|
||||
messages,
|
||||
@@ -592,28 +593,22 @@ async function streamResponse({
|
||||
|
||||
let startResponseWrite = false;
|
||||
let answer = '';
|
||||
let reasoning = '';
|
||||
let finish_reason: CompletionFinishReason = null;
|
||||
let usage = getLLMDefaultUsage();
|
||||
|
||||
const { parsePart } = parseLLMStreamResponse();
|
||||
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
finish_reason = 'close';
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { reasoningContent, content, responseContent, finishReason } = parsePart({
|
||||
const { reasoningContent, content, responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: aiChatReasoning,
|
||||
retainDatasetCite
|
||||
});
|
||||
finish_reason = finish_reason || finishReason;
|
||||
answer += content;
|
||||
reasoning += reasoningContent;
|
||||
|
||||
// Reasoning response
|
||||
if (aiChatReasoning && reasoningContent) {
|
||||
@@ -658,7 +653,9 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer, reasoning, finish_reason, usage };
|
||||
const { reasoningContent, content, finish_reason, usage } = getResponseData();
|
||||
|
||||
return { answer: content, reasoning: reasoningContent, finish_reason, usage };
|
||||
}
|
||||
|
||||
const parseAnswer = (
|
||||
|
@@ -7,17 +7,13 @@ import {
|
||||
type ChatCompletionToolMessageParam,
|
||||
type ChatCompletionMessageParam,
|
||||
type ChatCompletionTool,
|
||||
type ChatCompletionAssistantMessageParam,
|
||||
type CompletionFinishReason
|
||||
} from '@fastgpt/global/core/ai/type';
|
||||
import { type NextApiResponse } from 'next';
|
||||
import { responseWriteController } from '../../../../../common/response';
|
||||
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
|
||||
import {
|
||||
ChatCompletionRequestMessageRoleEnum,
|
||||
getLLMDefaultUsage
|
||||
} from '@fastgpt/global/core/ai/constants';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import { dispatchWorkFlow } from '../../index';
|
||||
import {
|
||||
type DispatchToolModuleProps,
|
||||
@@ -254,7 +250,8 @@ export const runToolWithToolChoice = async (
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
maxToken,
|
||||
min: 100
|
||||
});
|
||||
|
||||
// Filter histories by maxToken
|
||||
@@ -319,10 +316,12 @@ export const runToolWithToolChoice = async (
|
||||
}
|
||||
});
|
||||
|
||||
let { answer, toolCalls, finish_reason, inputTokens, outputTokens } = await (async () => {
|
||||
let { reasoningContent, answer, toolCalls, finish_reason, inputTokens, outputTokens } =
|
||||
await (async () => {
|
||||
if (isStreamResponse) {
|
||||
if (!res || res.closed) {
|
||||
return {
|
||||
reasoningContent: '',
|
||||
answer: '',
|
||||
toolCalls: [],
|
||||
finish_reason: 'close' as const,
|
||||
@@ -341,6 +340,7 @@ export const runToolWithToolChoice = async (
|
||||
});
|
||||
|
||||
return {
|
||||
reasoningContent: result.reasoningContent,
|
||||
answer: result.answer,
|
||||
toolCalls: result.toolCalls,
|
||||
finish_reason: result.finish_reason,
|
||||
@@ -401,6 +401,7 @@ export const runToolWithToolChoice = async (
|
||||
}
|
||||
|
||||
return {
|
||||
reasoningContent: (reasoningContent as string) || '',
|
||||
answer,
|
||||
toolCalls: toolCalls,
|
||||
finish_reason,
|
||||
@@ -409,7 +410,7 @@ export const runToolWithToolChoice = async (
|
||||
};
|
||||
}
|
||||
})();
|
||||
if (!answer && toolCalls.length === 0) {
|
||||
if (!answer && !reasoningContent && toolCalls.length === 0) {
|
||||
return Promise.reject(getEmptyResponseTip());
|
||||
}
|
||||
|
||||
@@ -501,12 +502,13 @@ export const runToolWithToolChoice = async (
|
||||
|
||||
if (toolCalls.length > 0) {
|
||||
// Run the tool, combine its results, and perform another round of AI calls
|
||||
const assistantToolMsgParams: ChatCompletionAssistantMessageParam[] = [
|
||||
...(answer
|
||||
const assistantToolMsgParams: ChatCompletionMessageParam[] = [
|
||||
...(answer || reasoningContent
|
||||
? [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
|
||||
content: answer
|
||||
content: answer,
|
||||
reasoning_text: reasoningContent
|
||||
}
|
||||
]
|
||||
: []),
|
||||
@@ -627,9 +629,10 @@ export const runToolWithToolChoice = async (
|
||||
);
|
||||
} else {
|
||||
// No tool is invoked, indicating that the process is over
|
||||
const gptAssistantResponse: ChatCompletionAssistantMessageParam = {
|
||||
const gptAssistantResponse: ChatCompletionMessageParam = {
|
||||
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
content: answer
|
||||
content: answer,
|
||||
reasoning_text: reasoningContent
|
||||
};
|
||||
const completeMessages = filterMessages.concat(gptAssistantResponse);
|
||||
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
|
||||
@@ -671,34 +674,23 @@ async function streamResponse({
|
||||
readStream: stream
|
||||
});
|
||||
|
||||
let textAnswer = '';
|
||||
let callingTool: { name: string; arguments: string } | null = null;
|
||||
let toolCalls: ChatCompletionMessageToolCall[] = [];
|
||||
let finish_reason: CompletionFinishReason = null;
|
||||
let usage = getLLMDefaultUsage();
|
||||
|
||||
const { parsePart } = parseLLMStreamResponse();
|
||||
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
finish_reason = 'close';
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const {
|
||||
reasoningContent,
|
||||
content: toolChoiceContent,
|
||||
responseContent,
|
||||
finishReason
|
||||
} = parsePart({
|
||||
const { reasoningContent, responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: true,
|
||||
retainDatasetCite
|
||||
});
|
||||
textAnswer += toolChoiceContent;
|
||||
finish_reason = finishReason || finish_reason;
|
||||
|
||||
const responseChoice = part.choices?.[0]?.delta;
|
||||
|
||||
@@ -800,5 +792,13 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer: textAnswer, toolCalls: toolCalls.filter(Boolean), finish_reason, usage };
|
||||
const { reasoningContent, content, finish_reason, usage } = getResponseData();
|
||||
|
||||
return {
|
||||
reasoningContent,
|
||||
answer: content,
|
||||
toolCalls: toolCalls.filter(Boolean),
|
||||
finish_reason,
|
||||
usage
|
||||
};
|
||||
}
|
||||
|
@@ -556,30 +556,21 @@ async function streamResponse({
|
||||
res,
|
||||
readStream: stream
|
||||
});
|
||||
let answer = '';
|
||||
let reasoning = '';
|
||||
let finish_reason: CompletionFinishReason = null;
|
||||
let usage: CompletionUsage = getLLMDefaultUsage();
|
||||
|
||||
const { parsePart } = parseLLMStreamResponse();
|
||||
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
finish_reason = 'close';
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { reasoningContent, content, responseContent, finishReason } = parsePart({
|
||||
const { reasoningContent, responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag,
|
||||
retainDatasetCite
|
||||
});
|
||||
finish_reason = finish_reason || finishReason;
|
||||
answer += content;
|
||||
reasoning += reasoningContent;
|
||||
|
||||
if (aiChatReasoning && reasoningContent) {
|
||||
workflowStreamResponse?.({
|
||||
@@ -602,5 +593,7 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
const { reasoningContent: reasoning, content: answer, finish_reason, usage } = getResponseData();
|
||||
|
||||
return { answer, reasoning, finish_reason, usage };
|
||||
}
|
||||
|
Reference in New Issue
Block a user