perf: request llm (#6191)

* perf: request error info

* perf: request llm'

* perf: request llm'

* openapi doc
This commit is contained in:
Archer
2026-01-06 13:21:57 +08:00
committed by GitHub
parent f7e46ec760
commit 9f2adcd523
26 changed files with 425 additions and 254 deletions
@@ -177,47 +177,55 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
const write = res ? responseWriteController({ res, readStream: stream }) : undefined;
const { completeMessages, reasoningText, answerText, finish_reason, responseEmptyTip, usage } =
await createLLMResponse({
body: {
model: modelConstantsData.model,
stream,
messages: filterMessages,
temperature,
max_tokens,
top_p: aiChatTopP,
stop: aiChatStopSign,
response_format: {
type: aiChatResponseFormat,
json_schema: aiChatJsonSchema
},
retainDatasetCite,
useVision: aiChatVision,
requestOrigin
const {
completeMessages,
reasoningText,
answerText,
finish_reason,
responseEmptyTip,
usage,
error
} = await createLLMResponse({
throwError: false,
body: {
model: modelConstantsData.model,
stream,
messages: filterMessages,
temperature,
max_tokens,
top_p: aiChatTopP,
stop: aiChatStopSign,
response_format: {
type: aiChatResponseFormat,
json_schema: aiChatJsonSchema
},
userKey: externalProvider.openaiAccount,
isAborted: checkIsStopping,
onReasoning({ text }) {
if (!aiChatReasoning) return;
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
reasoning_content: text
})
});
},
onStreaming({ text }) {
if (!isResponseAnswerText) return;
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text
})
});
}
});
retainDatasetCite,
useVision: aiChatVision,
requestOrigin
},
userKey: externalProvider.openaiAccount,
isAborted: checkIsStopping,
onReasoning({ text }) {
if (!aiChatReasoning) return;
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
reasoning_content: text
})
});
},
onStreaming({ text }) {
if (!isResponseAnswerText) return;
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text
})
});
}
});
if (responseEmptyTip) {
return getNodeErrResponse({ error: responseEmptyTip });
@@ -232,6 +240,35 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
const chatCompleteMessages = GPTMessages2Chats({ messages: completeMessages });
if (error) {
return getNodeErrResponse({
error,
responseData: {
totalPoints: points,
model: modelName,
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
query: `${userChatInput}`,
maxToken: max_tokens,
reasoningText,
historyPreview: getHistoryPreview(chatCompleteMessages, 10000, aiChatVision),
contextTotalLen: completeMessages.length,
finishReason: finish_reason
},
...(points && {
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
{
moduleName: name,
totalPoints: points,
model: modelName,
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens
}
]
})
});
}
return {
data: {
answerText: answerText,
@@ -14,7 +14,6 @@ import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'
import { type DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
import { getHandleId } from '@fastgpt/global/core/workflow/utils';
import { addLog } from '../../../../common/system/log';
import { ModelTypeEnum } from '../../../../../global/core/ai/model';
import { createLLMResponse } from '../../../ai/llm/request';
type Props = ModuleDispatchProps<{
@@ -187,7 +187,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
toolCallOutputTokens,
completeMessages = [], // The actual message sent to AI(just save text)
assistantResponses = [], // FastGPT system store assistant.value response
finish_reason
finish_reason,
error
} = await (async () => {
const adaptMessages = chats2GPTMessages({
messages,
@@ -224,6 +225,46 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
// Preview assistant responses
const previewAssistantResponses = filterToolResponseToPreview(assistantResponses);
if (error) {
return getNodeErrResponse({
error,
[DispatchNodeResponseKeyEnum.nodeResponse]: {
totalPoints: totalPointsUsage,
toolCallInputTokens: toolCallInputTokens,
toolCallOutputTokens: toolCallOutputTokens,
childTotalPoints: toolTotalPoints,
model: modelName,
query: userChatInput,
historyPreview: getHistoryPreview(
GPTMessages2Chats({ messages: completeMessages, reserveTool: false }),
10000,
useVision
),
toolDetail: toolDispatchFlowResponses.map((item) => item.flowResponses).flat(),
mergeSignId: nodeId,
finishReason: finish_reason
},
[DispatchNodeResponseKeyEnum.runTimes]: toolDispatchFlowResponses.reduce(
(sum, item) => sum + item.runTimes,
0
),
...(totalPointsUsage && {
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
// 模型本身的积分消耗
{
moduleName: name,
model: modelName,
totalPoints: modelUsage,
inputTokens: toolCallInputTokens,
outputTokens: toolCallOutputTokens
},
// 工具的消耗
...toolUsages
]
})
});
}
return {
data: {
[NodeOutputKeyEnum.answerText]: previewAssistantResponses
@@ -110,7 +110,8 @@ export const runToolCall = async (props: DispatchToolModuleProps): Promise<RunTo
completeMessages,
assistantMessages,
interactiveResponse,
finish_reason
finish_reason,
error
} = await runAgentCall({
maxRunAgentTimes: 50,
body: {
@@ -310,6 +311,7 @@ export const runToolCall = async (props: DispatchToolModuleProps): Promise<RunTo
.flat();
return {
error,
toolDispatchFlowResponses: toolRunResponses,
toolCallInputTokens: inputTokens,
toolCallOutputTokens: outputTokens,
@@ -46,6 +46,7 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
};
export type RunToolResponse = {
error?: any;
toolDispatchFlowResponses: DispatchFlowResponse[];
toolCallInputTokens: number;
toolCallOutputTokens: number;
@@ -329,7 +329,7 @@ export const dispatchRunTool = async (props: RunToolProps): Promise<RunToolRespo
return getNodeErrResponse({
error,
customNodeResponse: {
[DispatchNodeResponseKeyEnum.nodeResponse]: {
toolInput,
moduleLogo: avatar
}
@@ -203,6 +203,9 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
: null
};
} catch (error) {
return getNodeErrResponse({ error, customNodeResponse: { moduleLogo: plugin?.avatar } });
return getNodeErrResponse({
error,
[DispatchNodeResponseKeyEnum.nodeResponse]: { moduleLogo: plugin?.avatar }
});
}
};
@@ -25,6 +25,7 @@ import { getMCPChildren } from '../../../core/app/mcp';
import { getSystemToolRunTimeNodeFromSystemToolset } from '../utils';
import type { localeType } from '@fastgpt/global/common/i18n/type';
import type { HttpToolConfigType } from '@fastgpt/global/core/app/type';
import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
export const getWorkflowResponseWrite = ({
res,
@@ -293,22 +294,34 @@ export const rewriteRuntimeWorkFlow = async ({
export const getNodeErrResponse = ({
error,
customErr,
customNodeResponse
responseData,
nodeDispatchUsages,
runTimes,
newVariables,
system_memories
}: {
error: any;
customErr?: Record<string, any>;
customNodeResponse?: Record<string, any>;
[DispatchNodeResponseKeyEnum.nodeResponse]?: Record<string, any>;
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]?: ChatNodeUsageType[]; // Node total usage
[DispatchNodeResponseKeyEnum.runTimes]?: number;
[DispatchNodeResponseKeyEnum.newVariables]?: Record<string, any>;
[DispatchNodeResponseKeyEnum.memories]?: Record<string, any>;
}) => {
const errorText = getErrText(error);
return {
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: nodeDispatchUsages,
[DispatchNodeResponseKeyEnum.runTimes]: runTimes,
[DispatchNodeResponseKeyEnum.newVariables]: newVariables,
[DispatchNodeResponseKeyEnum.memories]: system_memories,
error: {
[NodeOutputKeyEnum.errorText]: errorText,
...(typeof customErr === 'object' ? customErr : {})
},
[DispatchNodeResponseKeyEnum.nodeResponse]: {
errorText,
...(typeof customNodeResponse === 'object' ? customNodeResponse : {})
...(typeof responseData === 'object' ? responseData : {})
},
[DispatchNodeResponseKeyEnum.toolResponses]: {
error: errorText,