mirror of
https://github.com/labring/FastGPT.git
synced 2026-02-27 01:02:22 +08:00
perf: request llm (#6191)
* perf: request error info * perf: request llm' * perf: request llm' * openapi doc
This commit is contained in:
@@ -41,6 +41,7 @@ export enum EmbeddingTypeEnm {
|
||||
}
|
||||
|
||||
export const completionFinishReasonMap = {
|
||||
error: i18nT('chat:completion_finish_error'),
|
||||
close: i18nT('chat:completion_finish_close'),
|
||||
stop: i18nT('chat:completion_finish_stop'),
|
||||
length: i18nT('chat:completion_finish_length'),
|
||||
|
||||
9
packages/global/core/ai/type.d.ts
vendored
9
packages/global/core/ai/type.d.ts
vendored
@@ -1,5 +1,6 @@
|
||||
import openai from 'openai';
|
||||
import type {
|
||||
ChatCompletion as SdkChatCompletion,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionMessageParam as SdkChatCompletionMessageParam,
|
||||
ChatCompletionToolMessageParam,
|
||||
@@ -70,10 +71,16 @@ export type ChatCompletionMessageFunctionCall =
|
||||
};
|
||||
|
||||
// Stream response
|
||||
export type StreamChatType = Stream<openai.Chat.Completions.ChatCompletionChunk>;
|
||||
export type StreamChatType = Stream<openai.Chat.Completions.ChatCompletionChunk & { error?: any }>;
|
||||
export type UnStreamChatType = openai.Chat.Completions.ChatCompletion;
|
||||
|
||||
// UnStream response
|
||||
export type ChatCompletion = SdkChatCompletion & {
|
||||
error?: any;
|
||||
};
|
||||
|
||||
export type CompletionFinishReason =
|
||||
| 'error'
|
||||
| 'close'
|
||||
| 'stop'
|
||||
| 'length'
|
||||
|
||||
@@ -2,6 +2,31 @@ import { OutLinkChatAuthSchema } from '../../../../support/permission/chat';
|
||||
import { ObjectIdSchema } from '../../../../common/type/mongo';
|
||||
import z from 'zod';
|
||||
|
||||
/* Init */
|
||||
// Online chat
|
||||
export const InitChatQuerySchema = z
|
||||
.object({
|
||||
appId: ObjectIdSchema.describe('应用ID'),
|
||||
chatId: z.string().min(1).describe('对话ID'),
|
||||
loadCustomFeedbacks: z.boolean().optional().describe('是否加载自定义反馈')
|
||||
})
|
||||
.meta({
|
||||
example: {
|
||||
appId: '1234567890',
|
||||
chatId: '1234567890',
|
||||
loadCustomFeedbacks: true
|
||||
}
|
||||
});
|
||||
export type InitChatQueryType = z.infer<typeof InitChatQuerySchema>;
|
||||
export const InitChatResponseSchema = z.object({
|
||||
chatId: z.string().min(1).describe('对话ID'),
|
||||
appId: ObjectIdSchema.describe('应用ID'),
|
||||
userAvatar: z.string().optional().describe('用户头像'),
|
||||
title: z.string().min(1).describe('对话标题'),
|
||||
variables: z.record(z.string(), z.any()).optional().describe('全局变量值'),
|
||||
app: z.object({}).describe('应用配置')
|
||||
});
|
||||
|
||||
/* ============ v2/chat/stop ============ */
|
||||
export const StopV2ChatSchema = z
|
||||
.object({
|
||||
|
||||
@@ -28,7 +28,13 @@ export const openAPIDocument = createDocument({
|
||||
},
|
||||
{
|
||||
name: '对话管理',
|
||||
tags: [TagsMap.chatHistory, TagsMap.chatPage, TagsMap.chatFeedback, TagsMap.chatSetting]
|
||||
tags: [
|
||||
TagsMap.chatPage,
|
||||
TagsMap.chatHistory,
|
||||
TagsMap.chatController,
|
||||
TagsMap.chatFeedback,
|
||||
TagsMap.chatSetting
|
||||
]
|
||||
},
|
||||
{
|
||||
name: '知识库',
|
||||
|
||||
@@ -6,11 +6,11 @@ export const TagsMap = {
|
||||
appCommon: 'Agent 管理',
|
||||
|
||||
// Chat - home
|
||||
chatPage: '对话页',
|
||||
chatController: '对话框操作',
|
||||
chatHistory: '对话历史管理',
|
||||
chatSetting: '门户页配置',
|
||||
chatPage: '对话页面通用',
|
||||
chatHistory: '历史记录管理',
|
||||
chatController: '对话操作',
|
||||
chatFeedback: '对话反馈',
|
||||
chatSetting: '门户页配置',
|
||||
|
||||
// Dataset
|
||||
datasetCollection: '集合',
|
||||
|
||||
@@ -55,6 +55,7 @@ type RunAgentCallProps = {
|
||||
} & ResponseEvents;
|
||||
|
||||
type RunAgentResponse = {
|
||||
error?: any;
|
||||
completeMessages: ChatCompletionMessageParam[]; // Step request complete messages
|
||||
assistantMessages: ChatCompletionMessageParam[]; // Step assistant response messages
|
||||
interactiveResponse?: ToolCallChildrenInteractive;
|
||||
@@ -134,6 +135,7 @@ export const runAgentCall = async ({
|
||||
let inputTokens: number = 0;
|
||||
let outputTokens: number = 0;
|
||||
let finish_reason: CompletionFinishReason | undefined;
|
||||
let requestError: any;
|
||||
const subAppUsages: ChatNodeUsageType[] = [];
|
||||
|
||||
// 处理 tool 里的交互
|
||||
@@ -213,8 +215,10 @@ export const runAgentCall = async ({
|
||||
usage,
|
||||
responseEmptyTip,
|
||||
assistantMessage: llmAssistantMessage,
|
||||
finish_reason: finishReason
|
||||
finish_reason: finishReason,
|
||||
error
|
||||
} = await createLLMResponse({
|
||||
throwError: false,
|
||||
body: {
|
||||
...body,
|
||||
max_tokens: maxTokens,
|
||||
@@ -234,7 +238,11 @@ export const runAgentCall = async ({
|
||||
});
|
||||
|
||||
finish_reason = finishReason;
|
||||
requestError = error;
|
||||
|
||||
if (requestError) {
|
||||
break;
|
||||
}
|
||||
if (responseEmptyTip) {
|
||||
return Promise.reject(responseEmptyTip);
|
||||
}
|
||||
@@ -303,6 +311,7 @@ export const runAgentCall = async ({
|
||||
}
|
||||
|
||||
return {
|
||||
error: requestError,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
subAppUsages,
|
||||
|
||||
@@ -39,6 +39,7 @@ export type ResponseEvents = {
|
||||
};
|
||||
|
||||
export type CreateLLMResponseProps<T extends CompletionsBodyType = CompletionsBodyType> = {
|
||||
throwError?: boolean;
|
||||
userKey?: OpenaiAccountType;
|
||||
body: LLMRequestBodyType<T>;
|
||||
isAborted?: () => boolean | undefined;
|
||||
@@ -46,6 +47,7 @@ export type CreateLLMResponseProps<T extends CompletionsBodyType = CompletionsBo
|
||||
} & ResponseEvents;
|
||||
|
||||
type LLMResponse = {
|
||||
error?: any;
|
||||
isStreamResponse: boolean;
|
||||
answerText: string;
|
||||
reasoningText: string;
|
||||
@@ -69,7 +71,7 @@ type LLMResponse = {
|
||||
export const createLLMResponse = async <T extends CompletionsBodyType>(
|
||||
args: CreateLLMResponseProps<T>
|
||||
): Promise<LLMResponse> => {
|
||||
const { body, custonHeaders, userKey } = args;
|
||||
const { throwError = true, body, custonHeaders, userKey } = args;
|
||||
const { messages, useVision, requestOrigin, tools, toolCallMode } = body;
|
||||
|
||||
// Messages process
|
||||
@@ -104,7 +106,7 @@ export const createLLMResponse = async <T extends CompletionsBodyType>(
|
||||
}
|
||||
});
|
||||
|
||||
const { answerText, reasoningText, toolCalls, finish_reason, usage } = await (async () => {
|
||||
let { answerText, reasoningText, toolCalls, finish_reason, usage, error } = await (async () => {
|
||||
if (isStreamResponse) {
|
||||
return createStreamResponse({
|
||||
response,
|
||||
@@ -151,6 +153,14 @@ export const createLLMResponse = async <T extends CompletionsBodyType>(
|
||||
usage?.prompt_tokens || (await countGptMessagesTokens(requestBody.messages, requestBody.tools));
|
||||
const outputTokens = usage?.completion_tokens || (await countGptMessagesTokens(assistantMessage));
|
||||
|
||||
if (error) {
|
||||
finish_reason = 'error';
|
||||
|
||||
if (throwError) {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
const getEmptyResponseTip = () => {
|
||||
if (userKey?.baseUrl) {
|
||||
addLog.warn(`User LLM response empty`, {
|
||||
@@ -172,10 +182,12 @@ export const createLLMResponse = async <T extends CompletionsBodyType>(
|
||||
!answerText &&
|
||||
!reasoningText &&
|
||||
!toolCalls?.length &&
|
||||
!error &&
|
||||
(finish_reason === 'stop' || !finish_reason);
|
||||
const responseEmptyTip = isNotResponse ? getEmptyResponseTip() : undefined;
|
||||
|
||||
return {
|
||||
error,
|
||||
isStreamResponse,
|
||||
responseEmptyTip,
|
||||
answerText,
|
||||
@@ -183,8 +195,8 @@ export const createLLMResponse = async <T extends CompletionsBodyType>(
|
||||
toolCalls,
|
||||
finish_reason,
|
||||
usage: {
|
||||
inputTokens,
|
||||
outputTokens
|
||||
inputTokens: error ? 0 : inputTokens,
|
||||
outputTokens: error ? 0 : outputTokens
|
||||
},
|
||||
|
||||
requestMessages,
|
||||
@@ -200,6 +212,7 @@ type CompleteResponse = Pick<
|
||||
'answerText' | 'reasoningText' | 'toolCalls' | 'finish_reason'
|
||||
> & {
|
||||
usage?: CompletionUsage;
|
||||
error?: any;
|
||||
};
|
||||
|
||||
export const createStreamResponse = async ({
|
||||
@@ -217,13 +230,174 @@ export const createStreamResponse = async ({
|
||||
const { retainDatasetCite = true, tools, toolCallMode = 'toolChoice', model } = body;
|
||||
const modelData = getLLMModel(model);
|
||||
|
||||
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||
const { parsePart, getResponseData, updateFinishReason, updateError } = parseLLMStreamResponse();
|
||||
|
||||
if (tools?.length) {
|
||||
if (toolCallMode === 'toolChoice') {
|
||||
let callingTool: ChatCompletionMessageToolCall['function'] | null = null;
|
||||
const toolCalls: ChatCompletionMessageToolCall[] = [];
|
||||
|
||||
try {
|
||||
for await (const part of response) {
|
||||
if (isAborted?.()) {
|
||||
response.controller?.abort();
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { reasoningContent, responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: modelData.reasoning,
|
||||
retainDatasetCite
|
||||
});
|
||||
|
||||
if (reasoningContent) {
|
||||
onReasoning?.({ text: reasoningContent });
|
||||
}
|
||||
if (responseContent) {
|
||||
onStreaming?.({ text: responseContent });
|
||||
}
|
||||
|
||||
const responseChoice = part.choices?.[0]?.delta;
|
||||
|
||||
// Parse tool calls
|
||||
if (responseChoice?.tool_calls?.length) {
|
||||
responseChoice.tool_calls.forEach((toolCall, i) => {
|
||||
const index = toolCall.index ?? i;
|
||||
|
||||
// Call new tool
|
||||
const hasNewTool = toolCall?.function?.name || callingTool;
|
||||
if (hasNewTool) {
|
||||
// Call new tool
|
||||
if (toolCall?.function?.name) {
|
||||
callingTool = {
|
||||
name: toolCall.function?.name || '',
|
||||
arguments: toolCall.function?.arguments || ''
|
||||
};
|
||||
} else if (callingTool) {
|
||||
// Continue call(Perhaps the name of the previous function was incomplete)
|
||||
callingTool.name += toolCall.function?.name || '';
|
||||
callingTool.arguments += toolCall.function?.arguments || '';
|
||||
}
|
||||
|
||||
// New tool, add to list.
|
||||
if (tools.find((item) => item.function.name === callingTool!.name)) {
|
||||
const call: ChatCompletionMessageToolCall = {
|
||||
id: getNanoid(),
|
||||
type: 'function',
|
||||
function: callingTool!
|
||||
};
|
||||
toolCalls[index] = call;
|
||||
onToolCall?.({ call });
|
||||
callingTool = null;
|
||||
}
|
||||
} else {
|
||||
/* arg 追加到当前工具的参数里 */
|
||||
const arg: string = toolCall?.function?.arguments ?? '';
|
||||
const currentTool = toolCalls[index];
|
||||
if (currentTool && arg) {
|
||||
currentTool.function.arguments += arg;
|
||||
|
||||
onToolParam?.({ tool: currentTool, params: arg });
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
updateError(error?.error || error);
|
||||
}
|
||||
|
||||
const { reasoningContent, content, finish_reason, usage, error } = getResponseData();
|
||||
|
||||
return {
|
||||
error,
|
||||
answerText: content,
|
||||
reasoningText: reasoningContent,
|
||||
finish_reason,
|
||||
usage,
|
||||
toolCalls: toolCalls.filter((call) => !!call)
|
||||
};
|
||||
} else {
|
||||
let startResponseWrite = false;
|
||||
let answer = '';
|
||||
|
||||
try {
|
||||
for await (const part of response) {
|
||||
if (isAborted?.()) {
|
||||
response.controller?.abort();
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { reasoningContent, content, responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: modelData.reasoning,
|
||||
retainDatasetCite
|
||||
});
|
||||
answer += content;
|
||||
|
||||
if (reasoningContent) {
|
||||
onReasoning?.({ text: reasoningContent });
|
||||
}
|
||||
|
||||
if (content) {
|
||||
if (startResponseWrite) {
|
||||
if (responseContent) {
|
||||
onStreaming?.({ text: responseContent });
|
||||
}
|
||||
} else if (answer.length >= 3) {
|
||||
answer = answer.trimStart();
|
||||
|
||||
// Not call tool
|
||||
if (/0(:|:)/.test(answer)) {
|
||||
startResponseWrite = true;
|
||||
|
||||
// find first : index
|
||||
const firstIndex =
|
||||
answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0:');
|
||||
answer = answer.substring(firstIndex + 2).trim();
|
||||
|
||||
onStreaming?.({ text: answer });
|
||||
}
|
||||
// Not response tool
|
||||
else if (/1(:|:)/.test(answer)) {
|
||||
}
|
||||
// Not start 1/0, start response
|
||||
else {
|
||||
startResponseWrite = true;
|
||||
onStreaming?.({ text: answer });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
updateError(error?.error || error);
|
||||
}
|
||||
|
||||
const { reasoningContent, content, finish_reason, usage, error } = getResponseData();
|
||||
const { answer: llmAnswer, streamAnswer, toolCalls } = parsePromptToolCall(content);
|
||||
|
||||
if (streamAnswer) {
|
||||
onStreaming?.({ text: streamAnswer });
|
||||
}
|
||||
|
||||
toolCalls?.forEach((call) => {
|
||||
onToolCall?.({ call });
|
||||
});
|
||||
|
||||
return {
|
||||
error,
|
||||
answerText: llmAnswer,
|
||||
reasoningText: reasoningContent,
|
||||
finish_reason,
|
||||
usage,
|
||||
toolCalls
|
||||
};
|
||||
}
|
||||
} else {
|
||||
// Not use tool
|
||||
try {
|
||||
for await (const part of response) {
|
||||
if (isAborted?.()) {
|
||||
response.controller?.abort();
|
||||
@@ -243,161 +417,15 @@ export const createStreamResponse = async ({
|
||||
if (responseContent) {
|
||||
onStreaming?.({ text: responseContent });
|
||||
}
|
||||
|
||||
const responseChoice = part.choices?.[0]?.delta;
|
||||
|
||||
// Parse tool calls
|
||||
if (responseChoice?.tool_calls?.length) {
|
||||
responseChoice.tool_calls.forEach((toolCall, i) => {
|
||||
const index = toolCall.index ?? i;
|
||||
|
||||
// Call new tool
|
||||
const hasNewTool = toolCall?.function?.name || callingTool;
|
||||
if (hasNewTool) {
|
||||
// Call new tool
|
||||
if (toolCall?.function?.name) {
|
||||
callingTool = {
|
||||
name: toolCall.function?.name || '',
|
||||
arguments: toolCall.function?.arguments || ''
|
||||
};
|
||||
} else if (callingTool) {
|
||||
// Continue call(Perhaps the name of the previous function was incomplete)
|
||||
callingTool.name += toolCall.function?.name || '';
|
||||
callingTool.arguments += toolCall.function?.arguments || '';
|
||||
}
|
||||
|
||||
// New tool, add to list.
|
||||
if (tools.find((item) => item.function.name === callingTool!.name)) {
|
||||
const call: ChatCompletionMessageToolCall = {
|
||||
id: getNanoid(),
|
||||
type: 'function',
|
||||
function: callingTool!
|
||||
};
|
||||
toolCalls[index] = call;
|
||||
onToolCall?.({ call });
|
||||
callingTool = null;
|
||||
}
|
||||
} else {
|
||||
/* arg 追加到当前工具的参数里 */
|
||||
const arg: string = toolCall?.function?.arguments ?? '';
|
||||
const currentTool = toolCalls[index];
|
||||
if (currentTool && arg) {
|
||||
currentTool.function.arguments += arg;
|
||||
|
||||
onToolParam?.({ tool: currentTool, params: arg });
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const { reasoningContent, content, finish_reason, usage } = getResponseData();
|
||||
|
||||
return {
|
||||
answerText: content,
|
||||
reasoningText: reasoningContent,
|
||||
finish_reason,
|
||||
usage,
|
||||
toolCalls: toolCalls.filter((call) => !!call)
|
||||
};
|
||||
} else {
|
||||
let startResponseWrite = false;
|
||||
let answer = '';
|
||||
|
||||
for await (const part of response) {
|
||||
if (isAborted?.()) {
|
||||
response.controller?.abort();
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { reasoningContent, content, responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: modelData.reasoning,
|
||||
retainDatasetCite
|
||||
});
|
||||
answer += content;
|
||||
|
||||
if (reasoningContent) {
|
||||
onReasoning?.({ text: reasoningContent });
|
||||
}
|
||||
|
||||
if (content) {
|
||||
if (startResponseWrite) {
|
||||
if (responseContent) {
|
||||
onStreaming?.({ text: responseContent });
|
||||
}
|
||||
} else if (answer.length >= 3) {
|
||||
answer = answer.trimStart();
|
||||
|
||||
// Not call tool
|
||||
if (/0(:|:)/.test(answer)) {
|
||||
startResponseWrite = true;
|
||||
|
||||
// find first : index
|
||||
const firstIndex =
|
||||
answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0:');
|
||||
answer = answer.substring(firstIndex + 2).trim();
|
||||
|
||||
onStreaming?.({ text: answer });
|
||||
}
|
||||
// Not response tool
|
||||
else if (/1(:|:)/.test(answer)) {
|
||||
}
|
||||
// Not start 1/0, start response
|
||||
else {
|
||||
startResponseWrite = true;
|
||||
onStreaming?.({ text: answer });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const { reasoningContent, content, finish_reason, usage } = getResponseData();
|
||||
const { answer: llmAnswer, streamAnswer, toolCalls } = parsePromptToolCall(content);
|
||||
|
||||
if (streamAnswer) {
|
||||
onStreaming?.({ text: streamAnswer });
|
||||
}
|
||||
|
||||
toolCalls?.forEach((call) => {
|
||||
onToolCall?.({ call });
|
||||
});
|
||||
|
||||
return {
|
||||
answerText: llmAnswer,
|
||||
reasoningText: reasoningContent,
|
||||
finish_reason,
|
||||
usage,
|
||||
toolCalls
|
||||
};
|
||||
}
|
||||
} else {
|
||||
// Not use tool
|
||||
for await (const part of response) {
|
||||
if (isAborted?.()) {
|
||||
response.controller?.abort();
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { reasoningContent, responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: modelData.reasoning,
|
||||
retainDatasetCite
|
||||
});
|
||||
|
||||
if (reasoningContent) {
|
||||
onReasoning?.({ text: reasoningContent });
|
||||
}
|
||||
if (responseContent) {
|
||||
onStreaming?.({ text: responseContent });
|
||||
}
|
||||
} catch (error: any) {
|
||||
updateError(error?.error || error);
|
||||
}
|
||||
|
||||
const { reasoningContent, content, finish_reason, usage } = getResponseData();
|
||||
const { reasoningContent, content, finish_reason, usage, error } = getResponseData();
|
||||
|
||||
return {
|
||||
error,
|
||||
answerText: content,
|
||||
reasoningText: reasoningContent,
|
||||
finish_reason,
|
||||
@@ -479,6 +507,7 @@ export const createCompleteResponse = async ({
|
||||
}
|
||||
|
||||
return {
|
||||
error: response.error,
|
||||
reasoningText: formatReasonContent,
|
||||
answerText: formatContent,
|
||||
toolCalls,
|
||||
@@ -580,9 +609,9 @@ const llmCompletionsBodyFormat = async <T extends CompletionsBodyType>({
|
||||
})
|
||||
} as T;
|
||||
|
||||
// Filter null value
|
||||
// Filter undefined/null value
|
||||
requestBody = Object.fromEntries(
|
||||
Object.entries(requestBody).filter(([_, value]) => value !== null)
|
||||
Object.entries(requestBody).filter(([_, value]) => value !== null && value !== undefined)
|
||||
) as T;
|
||||
|
||||
// field map
|
||||
|
||||
@@ -364,6 +364,9 @@ export const loadRequestMessages = async ({
|
||||
const loadMessages = (
|
||||
await Promise.all(
|
||||
mergeMessages.map(async (item, i) => {
|
||||
delete item.dataId;
|
||||
delete item.hideInUI;
|
||||
|
||||
if (item.role === ChatCompletionRequestMessageRoleEnum.System) {
|
||||
const content = parseSystemMessage(item.content);
|
||||
if (!content) return;
|
||||
|
||||
@@ -73,6 +73,7 @@ export const parseLLMStreamResponse = () => {
|
||||
let buffer_usage: CompletionUsage = getLLMDefaultUsage();
|
||||
let buffer_reasoningContent = '';
|
||||
let buffer_content = '';
|
||||
let error: any = undefined;
|
||||
|
||||
/*
|
||||
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
|
||||
@@ -84,6 +85,7 @@ export const parseLLMStreamResponse = () => {
|
||||
retainDatasetCite = true
|
||||
}: {
|
||||
part: {
|
||||
error?: any;
|
||||
choices: {
|
||||
delta: {
|
||||
content?: string | null;
|
||||
@@ -96,6 +98,7 @@ export const parseLLMStreamResponse = () => {
|
||||
parseThinkTag?: boolean;
|
||||
retainDatasetCite?: boolean;
|
||||
}): {
|
||||
error?: any;
|
||||
reasoningContent: string;
|
||||
content: string; // 原始内容,不去掉 cite
|
||||
responseContent: string; // 响应的内容,会去掉 cite
|
||||
@@ -297,11 +300,14 @@ export const parseLLMStreamResponse = () => {
|
||||
buffer_reasoningContent += data.reasoningContent;
|
||||
buffer_content += data.content;
|
||||
|
||||
error = part.error || error;
|
||||
|
||||
return data;
|
||||
};
|
||||
|
||||
const getResponseData = () => {
|
||||
return {
|
||||
error,
|
||||
finish_reason: buffer_finishReason,
|
||||
usage: buffer_usage,
|
||||
reasoningContent: buffer_reasoningContent,
|
||||
@@ -312,11 +318,15 @@ export const parseLLMStreamResponse = () => {
|
||||
const updateFinishReason = (finishReason: CompletionFinishReason) => {
|
||||
buffer_finishReason = finishReason;
|
||||
};
|
||||
const updateError = (err: any) => {
|
||||
error = err;
|
||||
};
|
||||
|
||||
return {
|
||||
parsePart,
|
||||
getResponseData,
|
||||
updateFinishReason
|
||||
updateFinishReason,
|
||||
updateError
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -177,47 +177,55 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
|
||||
const write = res ? responseWriteController({ res, readStream: stream }) : undefined;
|
||||
|
||||
const { completeMessages, reasoningText, answerText, finish_reason, responseEmptyTip, usage } =
|
||||
await createLLMResponse({
|
||||
body: {
|
||||
model: modelConstantsData.model,
|
||||
stream,
|
||||
messages: filterMessages,
|
||||
temperature,
|
||||
max_tokens,
|
||||
top_p: aiChatTopP,
|
||||
stop: aiChatStopSign,
|
||||
response_format: {
|
||||
type: aiChatResponseFormat,
|
||||
json_schema: aiChatJsonSchema
|
||||
},
|
||||
retainDatasetCite,
|
||||
useVision: aiChatVision,
|
||||
requestOrigin
|
||||
const {
|
||||
completeMessages,
|
||||
reasoningText,
|
||||
answerText,
|
||||
finish_reason,
|
||||
responseEmptyTip,
|
||||
usage,
|
||||
error
|
||||
} = await createLLMResponse({
|
||||
throwError: false,
|
||||
body: {
|
||||
model: modelConstantsData.model,
|
||||
stream,
|
||||
messages: filterMessages,
|
||||
temperature,
|
||||
max_tokens,
|
||||
top_p: aiChatTopP,
|
||||
stop: aiChatStopSign,
|
||||
response_format: {
|
||||
type: aiChatResponseFormat,
|
||||
json_schema: aiChatJsonSchema
|
||||
},
|
||||
userKey: externalProvider.openaiAccount,
|
||||
isAborted: checkIsStopping,
|
||||
onReasoning({ text }) {
|
||||
if (!aiChatReasoning) return;
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
reasoning_content: text
|
||||
})
|
||||
});
|
||||
},
|
||||
onStreaming({ text }) {
|
||||
if (!isResponseAnswerText) return;
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text
|
||||
})
|
||||
});
|
||||
}
|
||||
});
|
||||
retainDatasetCite,
|
||||
useVision: aiChatVision,
|
||||
requestOrigin
|
||||
},
|
||||
userKey: externalProvider.openaiAccount,
|
||||
isAborted: checkIsStopping,
|
||||
onReasoning({ text }) {
|
||||
if (!aiChatReasoning) return;
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
reasoning_content: text
|
||||
})
|
||||
});
|
||||
},
|
||||
onStreaming({ text }) {
|
||||
if (!isResponseAnswerText) return;
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text
|
||||
})
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
if (responseEmptyTip) {
|
||||
return getNodeErrResponse({ error: responseEmptyTip });
|
||||
@@ -232,6 +240,35 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
|
||||
const chatCompleteMessages = GPTMessages2Chats({ messages: completeMessages });
|
||||
|
||||
if (error) {
|
||||
return getNodeErrResponse({
|
||||
error,
|
||||
responseData: {
|
||||
totalPoints: points,
|
||||
model: modelName,
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens,
|
||||
query: `${userChatInput}`,
|
||||
maxToken: max_tokens,
|
||||
reasoningText,
|
||||
historyPreview: getHistoryPreview(chatCompleteMessages, 10000, aiChatVision),
|
||||
contextTotalLen: completeMessages.length,
|
||||
finishReason: finish_reason
|
||||
},
|
||||
...(points && {
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
|
||||
{
|
||||
moduleName: name,
|
||||
totalPoints: points,
|
||||
model: modelName,
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens
|
||||
}
|
||||
]
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
data: {
|
||||
answerText: answerText,
|
||||
|
||||
@@ -14,7 +14,6 @@ import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'
|
||||
import { type DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
|
||||
import { getHandleId } from '@fastgpt/global/core/workflow/utils';
|
||||
import { addLog } from '../../../../common/system/log';
|
||||
import { ModelTypeEnum } from '../../../../../global/core/ai/model';
|
||||
import { createLLMResponse } from '../../../ai/llm/request';
|
||||
|
||||
type Props = ModuleDispatchProps<{
|
||||
|
||||
@@ -187,7 +187,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
toolCallOutputTokens,
|
||||
completeMessages = [], // The actual message sent to AI(just save text)
|
||||
assistantResponses = [], // FastGPT system store assistant.value response
|
||||
finish_reason
|
||||
finish_reason,
|
||||
error
|
||||
} = await (async () => {
|
||||
const adaptMessages = chats2GPTMessages({
|
||||
messages,
|
||||
@@ -224,6 +225,46 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
// Preview assistant responses
|
||||
const previewAssistantResponses = filterToolResponseToPreview(assistantResponses);
|
||||
|
||||
if (error) {
|
||||
return getNodeErrResponse({
|
||||
error,
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
totalPoints: totalPointsUsage,
|
||||
toolCallInputTokens: toolCallInputTokens,
|
||||
toolCallOutputTokens: toolCallOutputTokens,
|
||||
childTotalPoints: toolTotalPoints,
|
||||
model: modelName,
|
||||
query: userChatInput,
|
||||
historyPreview: getHistoryPreview(
|
||||
GPTMessages2Chats({ messages: completeMessages, reserveTool: false }),
|
||||
10000,
|
||||
useVision
|
||||
),
|
||||
toolDetail: toolDispatchFlowResponses.map((item) => item.flowResponses).flat(),
|
||||
mergeSignId: nodeId,
|
||||
finishReason: finish_reason
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.runTimes]: toolDispatchFlowResponses.reduce(
|
||||
(sum, item) => sum + item.runTimes,
|
||||
0
|
||||
),
|
||||
...(totalPointsUsage && {
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
|
||||
// 模型本身的积分消耗
|
||||
{
|
||||
moduleName: name,
|
||||
model: modelName,
|
||||
totalPoints: modelUsage,
|
||||
inputTokens: toolCallInputTokens,
|
||||
outputTokens: toolCallOutputTokens
|
||||
},
|
||||
// 工具的消耗
|
||||
...toolUsages
|
||||
]
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
data: {
|
||||
[NodeOutputKeyEnum.answerText]: previewAssistantResponses
|
||||
|
||||
@@ -110,7 +110,8 @@ export const runToolCall = async (props: DispatchToolModuleProps): Promise<RunTo
|
||||
completeMessages,
|
||||
assistantMessages,
|
||||
interactiveResponse,
|
||||
finish_reason
|
||||
finish_reason,
|
||||
error
|
||||
} = await runAgentCall({
|
||||
maxRunAgentTimes: 50,
|
||||
body: {
|
||||
@@ -310,6 +311,7 @@ export const runToolCall = async (props: DispatchToolModuleProps): Promise<RunTo
|
||||
.flat();
|
||||
|
||||
return {
|
||||
error,
|
||||
toolDispatchFlowResponses: toolRunResponses,
|
||||
toolCallInputTokens: inputTokens,
|
||||
toolCallOutputTokens: outputTokens,
|
||||
|
||||
@@ -46,6 +46,7 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
|
||||
};
|
||||
|
||||
export type RunToolResponse = {
|
||||
error?: any;
|
||||
toolDispatchFlowResponses: DispatchFlowResponse[];
|
||||
toolCallInputTokens: number;
|
||||
toolCallOutputTokens: number;
|
||||
|
||||
@@ -329,7 +329,7 @@ export const dispatchRunTool = async (props: RunToolProps): Promise<RunToolRespo
|
||||
|
||||
return getNodeErrResponse({
|
||||
error,
|
||||
customNodeResponse: {
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
toolInput,
|
||||
moduleLogo: avatar
|
||||
}
|
||||
|
||||
@@ -203,6 +203,9 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
|
||||
: null
|
||||
};
|
||||
} catch (error) {
|
||||
return getNodeErrResponse({ error, customNodeResponse: { moduleLogo: plugin?.avatar } });
|
||||
return getNodeErrResponse({
|
||||
error,
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: { moduleLogo: plugin?.avatar }
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -25,6 +25,7 @@ import { getMCPChildren } from '../../../core/app/mcp';
|
||||
import { getSystemToolRunTimeNodeFromSystemToolset } from '../utils';
|
||||
import type { localeType } from '@fastgpt/global/common/i18n/type';
|
||||
import type { HttpToolConfigType } from '@fastgpt/global/core/app/type';
|
||||
import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||
|
||||
export const getWorkflowResponseWrite = ({
|
||||
res,
|
||||
@@ -293,22 +294,34 @@ export const rewriteRuntimeWorkFlow = async ({
|
||||
export const getNodeErrResponse = ({
|
||||
error,
|
||||
customErr,
|
||||
customNodeResponse
|
||||
responseData,
|
||||
nodeDispatchUsages,
|
||||
runTimes,
|
||||
newVariables,
|
||||
system_memories
|
||||
}: {
|
||||
error: any;
|
||||
customErr?: Record<string, any>;
|
||||
customNodeResponse?: Record<string, any>;
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]?: Record<string, any>;
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]?: ChatNodeUsageType[]; // Node total usage
|
||||
[DispatchNodeResponseKeyEnum.runTimes]?: number;
|
||||
[DispatchNodeResponseKeyEnum.newVariables]?: Record<string, any>;
|
||||
[DispatchNodeResponseKeyEnum.memories]?: Record<string, any>;
|
||||
}) => {
|
||||
const errorText = getErrText(error);
|
||||
|
||||
return {
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: nodeDispatchUsages,
|
||||
[DispatchNodeResponseKeyEnum.runTimes]: runTimes,
|
||||
[DispatchNodeResponseKeyEnum.newVariables]: newVariables,
|
||||
[DispatchNodeResponseKeyEnum.memories]: system_memories,
|
||||
error: {
|
||||
[NodeOutputKeyEnum.errorText]: errorText,
|
||||
...(typeof customErr === 'object' ? customErr : {})
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
errorText,
|
||||
...(typeof customNodeResponse === 'object' ? customNodeResponse : {})
|
||||
...(typeof responseData === 'object' ? responseData : {})
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.toolResponses]: {
|
||||
error: errorText,
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
"click_to_add_url": "Enter file link",
|
||||
"completion_finish_close": "Disconnection",
|
||||
"completion_finish_content_filter": "Trigger safe wind control",
|
||||
"completion_finish_error": "Request error",
|
||||
"completion_finish_function_call": "Function Calls",
|
||||
"completion_finish_length": "Reply limit exceeded",
|
||||
"completion_finish_null": "unknown",
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
"click_to_add_url": "输入文件链接",
|
||||
"completion_finish_close": "请求关闭",
|
||||
"completion_finish_content_filter": "触发安全风控",
|
||||
"completion_finish_error": "请求错误",
|
||||
"completion_finish_function_call": "函数调用",
|
||||
"completion_finish_length": "超出回复限制",
|
||||
"completion_finish_null": "未知",
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
"click_to_add_url": "輸入文件鏈接",
|
||||
"completion_finish_close": "連接斷開",
|
||||
"completion_finish_content_filter": "觸發安全風控",
|
||||
"completion_finish_error": "請求錯誤",
|
||||
"completion_finish_function_call": "函式呼叫",
|
||||
"completion_finish_length": "超出回覆限制",
|
||||
"completion_finish_null": "未知",
|
||||
|
||||
Reference in New Issue
Block a user