mirror of
https://github.com/labring/FastGPT.git
synced 2026-05-05 01:02:59 +08:00
perf: request llm (#6191)
* perf: request error info * perf: request llm' * perf: request llm' * openapi doc
This commit is contained in:
@@ -177,47 +177,55 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
|
||||
const write = res ? responseWriteController({ res, readStream: stream }) : undefined;
|
||||
|
||||
const { completeMessages, reasoningText, answerText, finish_reason, responseEmptyTip, usage } =
|
||||
await createLLMResponse({
|
||||
body: {
|
||||
model: modelConstantsData.model,
|
||||
stream,
|
||||
messages: filterMessages,
|
||||
temperature,
|
||||
max_tokens,
|
||||
top_p: aiChatTopP,
|
||||
stop: aiChatStopSign,
|
||||
response_format: {
|
||||
type: aiChatResponseFormat,
|
||||
json_schema: aiChatJsonSchema
|
||||
},
|
||||
retainDatasetCite,
|
||||
useVision: aiChatVision,
|
||||
requestOrigin
|
||||
const {
|
||||
completeMessages,
|
||||
reasoningText,
|
||||
answerText,
|
||||
finish_reason,
|
||||
responseEmptyTip,
|
||||
usage,
|
||||
error
|
||||
} = await createLLMResponse({
|
||||
throwError: false,
|
||||
body: {
|
||||
model: modelConstantsData.model,
|
||||
stream,
|
||||
messages: filterMessages,
|
||||
temperature,
|
||||
max_tokens,
|
||||
top_p: aiChatTopP,
|
||||
stop: aiChatStopSign,
|
||||
response_format: {
|
||||
type: aiChatResponseFormat,
|
||||
json_schema: aiChatJsonSchema
|
||||
},
|
||||
userKey: externalProvider.openaiAccount,
|
||||
isAborted: checkIsStopping,
|
||||
onReasoning({ text }) {
|
||||
if (!aiChatReasoning) return;
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
reasoning_content: text
|
||||
})
|
||||
});
|
||||
},
|
||||
onStreaming({ text }) {
|
||||
if (!isResponseAnswerText) return;
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text
|
||||
})
|
||||
});
|
||||
}
|
||||
});
|
||||
retainDatasetCite,
|
||||
useVision: aiChatVision,
|
||||
requestOrigin
|
||||
},
|
||||
userKey: externalProvider.openaiAccount,
|
||||
isAborted: checkIsStopping,
|
||||
onReasoning({ text }) {
|
||||
if (!aiChatReasoning) return;
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
reasoning_content: text
|
||||
})
|
||||
});
|
||||
},
|
||||
onStreaming({ text }) {
|
||||
if (!isResponseAnswerText) return;
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text
|
||||
})
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
if (responseEmptyTip) {
|
||||
return getNodeErrResponse({ error: responseEmptyTip });
|
||||
@@ -232,6 +240,35 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
|
||||
const chatCompleteMessages = GPTMessages2Chats({ messages: completeMessages });
|
||||
|
||||
if (error) {
|
||||
return getNodeErrResponse({
|
||||
error,
|
||||
responseData: {
|
||||
totalPoints: points,
|
||||
model: modelName,
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens,
|
||||
query: `${userChatInput}`,
|
||||
maxToken: max_tokens,
|
||||
reasoningText,
|
||||
historyPreview: getHistoryPreview(chatCompleteMessages, 10000, aiChatVision),
|
||||
contextTotalLen: completeMessages.length,
|
||||
finishReason: finish_reason
|
||||
},
|
||||
...(points && {
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
|
||||
{
|
||||
moduleName: name,
|
||||
totalPoints: points,
|
||||
model: modelName,
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens
|
||||
}
|
||||
]
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
data: {
|
||||
answerText: answerText,
|
||||
|
||||
@@ -14,7 +14,6 @@ import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'
|
||||
import { type DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
|
||||
import { getHandleId } from '@fastgpt/global/core/workflow/utils';
|
||||
import { addLog } from '../../../../common/system/log';
|
||||
import { ModelTypeEnum } from '../../../../../global/core/ai/model';
|
||||
import { createLLMResponse } from '../../../ai/llm/request';
|
||||
|
||||
type Props = ModuleDispatchProps<{
|
||||
|
||||
@@ -187,7 +187,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
toolCallOutputTokens,
|
||||
completeMessages = [], // The actual message sent to AI(just save text)
|
||||
assistantResponses = [], // FastGPT system store assistant.value response
|
||||
finish_reason
|
||||
finish_reason,
|
||||
error
|
||||
} = await (async () => {
|
||||
const adaptMessages = chats2GPTMessages({
|
||||
messages,
|
||||
@@ -224,6 +225,46 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
// Preview assistant responses
|
||||
const previewAssistantResponses = filterToolResponseToPreview(assistantResponses);
|
||||
|
||||
if (error) {
|
||||
return getNodeErrResponse({
|
||||
error,
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
totalPoints: totalPointsUsage,
|
||||
toolCallInputTokens: toolCallInputTokens,
|
||||
toolCallOutputTokens: toolCallOutputTokens,
|
||||
childTotalPoints: toolTotalPoints,
|
||||
model: modelName,
|
||||
query: userChatInput,
|
||||
historyPreview: getHistoryPreview(
|
||||
GPTMessages2Chats({ messages: completeMessages, reserveTool: false }),
|
||||
10000,
|
||||
useVision
|
||||
),
|
||||
toolDetail: toolDispatchFlowResponses.map((item) => item.flowResponses).flat(),
|
||||
mergeSignId: nodeId,
|
||||
finishReason: finish_reason
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.runTimes]: toolDispatchFlowResponses.reduce(
|
||||
(sum, item) => sum + item.runTimes,
|
||||
0
|
||||
),
|
||||
...(totalPointsUsage && {
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
|
||||
// 模型本身的积分消耗
|
||||
{
|
||||
moduleName: name,
|
||||
model: modelName,
|
||||
totalPoints: modelUsage,
|
||||
inputTokens: toolCallInputTokens,
|
||||
outputTokens: toolCallOutputTokens
|
||||
},
|
||||
// 工具的消耗
|
||||
...toolUsages
|
||||
]
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
data: {
|
||||
[NodeOutputKeyEnum.answerText]: previewAssistantResponses
|
||||
|
||||
@@ -110,7 +110,8 @@ export const runToolCall = async (props: DispatchToolModuleProps): Promise<RunTo
|
||||
completeMessages,
|
||||
assistantMessages,
|
||||
interactiveResponse,
|
||||
finish_reason
|
||||
finish_reason,
|
||||
error
|
||||
} = await runAgentCall({
|
||||
maxRunAgentTimes: 50,
|
||||
body: {
|
||||
@@ -310,6 +311,7 @@ export const runToolCall = async (props: DispatchToolModuleProps): Promise<RunTo
|
||||
.flat();
|
||||
|
||||
return {
|
||||
error,
|
||||
toolDispatchFlowResponses: toolRunResponses,
|
||||
toolCallInputTokens: inputTokens,
|
||||
toolCallOutputTokens: outputTokens,
|
||||
|
||||
@@ -46,6 +46,7 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
|
||||
};
|
||||
|
||||
export type RunToolResponse = {
|
||||
error?: any;
|
||||
toolDispatchFlowResponses: DispatchFlowResponse[];
|
||||
toolCallInputTokens: number;
|
||||
toolCallOutputTokens: number;
|
||||
|
||||
@@ -329,7 +329,7 @@ export const dispatchRunTool = async (props: RunToolProps): Promise<RunToolRespo
|
||||
|
||||
return getNodeErrResponse({
|
||||
error,
|
||||
customNodeResponse: {
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
toolInput,
|
||||
moduleLogo: avatar
|
||||
}
|
||||
|
||||
@@ -203,6 +203,9 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
|
||||
: null
|
||||
};
|
||||
} catch (error) {
|
||||
return getNodeErrResponse({ error, customNodeResponse: { moduleLogo: plugin?.avatar } });
|
||||
return getNodeErrResponse({
|
||||
error,
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: { moduleLogo: plugin?.avatar }
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -25,6 +25,7 @@ import { getMCPChildren } from '../../../core/app/mcp';
|
||||
import { getSystemToolRunTimeNodeFromSystemToolset } from '../utils';
|
||||
import type { localeType } from '@fastgpt/global/common/i18n/type';
|
||||
import type { HttpToolConfigType } from '@fastgpt/global/core/app/type';
|
||||
import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||
|
||||
export const getWorkflowResponseWrite = ({
|
||||
res,
|
||||
@@ -293,22 +294,34 @@ export const rewriteRuntimeWorkFlow = async ({
|
||||
export const getNodeErrResponse = ({
|
||||
error,
|
||||
customErr,
|
||||
customNodeResponse
|
||||
responseData,
|
||||
nodeDispatchUsages,
|
||||
runTimes,
|
||||
newVariables,
|
||||
system_memories
|
||||
}: {
|
||||
error: any;
|
||||
customErr?: Record<string, any>;
|
||||
customNodeResponse?: Record<string, any>;
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]?: Record<string, any>;
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]?: ChatNodeUsageType[]; // Node total usage
|
||||
[DispatchNodeResponseKeyEnum.runTimes]?: number;
|
||||
[DispatchNodeResponseKeyEnum.newVariables]?: Record<string, any>;
|
||||
[DispatchNodeResponseKeyEnum.memories]?: Record<string, any>;
|
||||
}) => {
|
||||
const errorText = getErrText(error);
|
||||
|
||||
return {
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: nodeDispatchUsages,
|
||||
[DispatchNodeResponseKeyEnum.runTimes]: runTimes,
|
||||
[DispatchNodeResponseKeyEnum.newVariables]: newVariables,
|
||||
[DispatchNodeResponseKeyEnum.memories]: system_memories,
|
||||
error: {
|
||||
[NodeOutputKeyEnum.errorText]: errorText,
|
||||
...(typeof customErr === 'object' ? customErr : {})
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
errorText,
|
||||
...(typeof customNodeResponse === 'object' ? customNodeResponse : {})
|
||||
...(typeof responseData === 'object' ? responseData : {})
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.toolResponses]: {
|
||||
error: errorText,
|
||||
|
||||
Reference in New Issue
Block a user