perf: request llm (#6191)

* perf: request error info * perf: request llm' * perf: request llm' * openapi doc
2026-05-05 01:02:59 +08:00 · 2026-01-06 13:21:57 +08:00
parent f7e46ec760
commit 9f2adcd523
26 changed files with 425 additions and 254 deletions
@@ -177,47 +177,55 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp

    const write = res ? responseWriteController({ res, readStream: stream }) : undefined;

-    const { completeMessages, reasoningText, answerText, finish_reason, responseEmptyTip, usage } =
-      await createLLMResponse({
-        body: {
-          model: modelConstantsData.model,
-          stream,
-          messages: filterMessages,
-          temperature,
-          max_tokens,
-          top_p: aiChatTopP,
-          stop: aiChatStopSign,
-          response_format: {
-            type: aiChatResponseFormat,
-            json_schema: aiChatJsonSchema
-          },
-          retainDatasetCite,
-          useVision: aiChatVision,
-          requestOrigin
+    const {
+      completeMessages,
+      reasoningText,
+      answerText,
+      finish_reason,
+      responseEmptyTip,
+      usage,
+      error
+    } = await createLLMResponse({
+      throwError: false,
+      body: {
+        model: modelConstantsData.model,
+        stream,
+        messages: filterMessages,
+        temperature,
+        max_tokens,
+        top_p: aiChatTopP,
+        stop: aiChatStopSign,
+        response_format: {
+          type: aiChatResponseFormat,
+          json_schema: aiChatJsonSchema
        },
-        userKey: externalProvider.openaiAccount,
-        isAborted: checkIsStopping,
-        onReasoning({ text }) {
-          if (!aiChatReasoning) return;
-          workflowStreamResponse?.({
-            write,
-            event: SseResponseEventEnum.answer,
-            data: textAdaptGptResponse({
-              reasoning_content: text
-            })
-          });
-        },
-        onStreaming({ text }) {
-          if (!isResponseAnswerText) return;
-          workflowStreamResponse?.({
-            write,
-            event: SseResponseEventEnum.answer,
-            data: textAdaptGptResponse({
-              text
-            })
-          });
-        }
-      });
+        retainDatasetCite,
+        useVision: aiChatVision,
+        requestOrigin
+      },
+      userKey: externalProvider.openaiAccount,
+      isAborted: checkIsStopping,
+      onReasoning({ text }) {
+        if (!aiChatReasoning) return;
+        workflowStreamResponse?.({
+          write,
+          event: SseResponseEventEnum.answer,
+          data: textAdaptGptResponse({
+            reasoning_content: text
+          })
+        });
+      },
+      onStreaming({ text }) {
+        if (!isResponseAnswerText) return;
+        workflowStreamResponse?.({
+          write,
+          event: SseResponseEventEnum.answer,
+          data: textAdaptGptResponse({
+            text
+          })
+        });
+      }
+    });

    if (responseEmptyTip) {
      return getNodeErrResponse({ error: responseEmptyTip });
@@ -232,6 +240,35 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp

    const chatCompleteMessages = GPTMessages2Chats({ messages: completeMessages });

+    if (error) {
+      return getNodeErrResponse({
+        error,
+        responseData: {
+          totalPoints: points,
+          model: modelName,
+          inputTokens: usage.inputTokens,
+          outputTokens: usage.outputTokens,
+          query: `${userChatInput}`,
+          maxToken: max_tokens,
+          reasoningText,
+          historyPreview: getHistoryPreview(chatCompleteMessages, 10000, aiChatVision),
+          contextTotalLen: completeMessages.length,
+          finishReason: finish_reason
+        },
+        ...(points && {
+          [DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
+            {
+              moduleName: name,
+              totalPoints: points,
+              model: modelName,
+              inputTokens: usage.inputTokens,
+              outputTokens: usage.outputTokens
+            }
+          ]
+        })
+      });
+    }
+
    return {
      data: {
        answerText: answerText,
@@ -14,7 +14,6 @@ import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'
 import { type DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
 import { getHandleId } from '@fastgpt/global/core/workflow/utils';
 import { addLog } from '../../../../common/system/log';
-import { ModelTypeEnum } from '../../../../../global/core/ai/model';
 import { createLLMResponse } from '../../../ai/llm/request';

 type Props = ModuleDispatchProps<{
@@ -187,7 +187,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
      toolCallOutputTokens,
      completeMessages = [], // The actual message sent to AI(just save text)
      assistantResponses = [], // FastGPT system store assistant.value response
-      finish_reason
+      finish_reason,
+      error
    } = await (async () => {
      const adaptMessages = chats2GPTMessages({
        messages,
@@ -224,6 +225,46 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
    // Preview assistant responses
    const previewAssistantResponses = filterToolResponseToPreview(assistantResponses);

+    if (error) {
+      return getNodeErrResponse({
+        error,
+        [DispatchNodeResponseKeyEnum.nodeResponse]: {
+          totalPoints: totalPointsUsage,
+          toolCallInputTokens: toolCallInputTokens,
+          toolCallOutputTokens: toolCallOutputTokens,
+          childTotalPoints: toolTotalPoints,
+          model: modelName,
+          query: userChatInput,
+          historyPreview: getHistoryPreview(
+            GPTMessages2Chats({ messages: completeMessages, reserveTool: false }),
+            10000,
+            useVision
+          ),
+          toolDetail: toolDispatchFlowResponses.map((item) => item.flowResponses).flat(),
+          mergeSignId: nodeId,
+          finishReason: finish_reason
+        },
+        [DispatchNodeResponseKeyEnum.runTimes]: toolDispatchFlowResponses.reduce(
+          (sum, item) => sum + item.runTimes,
+          0
+        ),
+        ...(totalPointsUsage && {
+          [DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
+            // 模型本身的积分消耗
+            {
+              moduleName: name,
+              model: modelName,
+              totalPoints: modelUsage,
+              inputTokens: toolCallInputTokens,
+              outputTokens: toolCallOutputTokens
+            },
+            // 工具的消耗
+            ...toolUsages
+          ]
+        })
+      });
+    }
+
    return {
      data: {
        [NodeOutputKeyEnum.answerText]: previewAssistantResponses
@@ -110,7 +110,8 @@ export const runToolCall = async (props: DispatchToolModuleProps): Promise<RunTo
    completeMessages,
    assistantMessages,
    interactiveResponse,
-    finish_reason
+    finish_reason,
+    error
  } = await runAgentCall({
    maxRunAgentTimes: 50,
    body: {
@@ -310,6 +311,7 @@ export const runToolCall = async (props: DispatchToolModuleProps): Promise<RunTo
    .flat();

  return {
+    error,
    toolDispatchFlowResponses: toolRunResponses,
    toolCallInputTokens: inputTokens,
    toolCallOutputTokens: outputTokens,
@@ -46,6 +46,7 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
 };

 export type RunToolResponse = {
+  error?: any;
  toolDispatchFlowResponses: DispatchFlowResponse[];
  toolCallInputTokens: number;
  toolCallOutputTokens: number;
@@ -329,7 +329,7 @@ export const dispatchRunTool = async (props: RunToolProps): Promise<RunToolRespo

    return getNodeErrResponse({
      error,
-      customNodeResponse: {
+      [DispatchNodeResponseKeyEnum.nodeResponse]: {
        toolInput,
        moduleLogo: avatar
      }
@@ -203,6 +203,9 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
        : null
    };
  } catch (error) {
-    return getNodeErrResponse({ error, customNodeResponse: { moduleLogo: plugin?.avatar } });
+    return getNodeErrResponse({
+      error,
+      [DispatchNodeResponseKeyEnum.nodeResponse]: { moduleLogo: plugin?.avatar }
+    });
  }
 };
@@ -25,6 +25,7 @@ import { getMCPChildren } from '../../../core/app/mcp';
 import { getSystemToolRunTimeNodeFromSystemToolset } from '../utils';
 import type { localeType } from '@fastgpt/global/common/i18n/type';
 import type { HttpToolConfigType } from '@fastgpt/global/core/app/type';
+import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';

 export const getWorkflowResponseWrite = ({
  res,
@@ -293,22 +294,34 @@ export const rewriteRuntimeWorkFlow = async ({
 export const getNodeErrResponse = ({
  error,
  customErr,
-  customNodeResponse
+  responseData,
+  nodeDispatchUsages,
+  runTimes,
+  newVariables,
+  system_memories
 }: {
  error: any;
  customErr?: Record<string, any>;
-  customNodeResponse?: Record<string, any>;
+  [DispatchNodeResponseKeyEnum.nodeResponse]?: Record<string, any>;
+  [DispatchNodeResponseKeyEnum.nodeDispatchUsages]?: ChatNodeUsageType[]; // Node total usage
+  [DispatchNodeResponseKeyEnum.runTimes]?: number;
+  [DispatchNodeResponseKeyEnum.newVariables]?: Record<string, any>;
+  [DispatchNodeResponseKeyEnum.memories]?: Record<string, any>;
 }) => {
  const errorText = getErrText(error);

  return {
+    [DispatchNodeResponseKeyEnum.nodeDispatchUsages]: nodeDispatchUsages,
+    [DispatchNodeResponseKeyEnum.runTimes]: runTimes,
+    [DispatchNodeResponseKeyEnum.newVariables]: newVariables,
+    [DispatchNodeResponseKeyEnum.memories]: system_memories,
    error: {
      [NodeOutputKeyEnum.errorText]: errorText,
      ...(typeof customErr === 'object' ? customErr : {})
    },
    [DispatchNodeResponseKeyEnum.nodeResponse]: {
      errorText,
-      ...(typeof customNodeResponse === 'object' ? customNodeResponse : {})
+      ...(typeof responseData === 'object' ? responseData : {})
    },
    [DispatchNodeResponseKeyEnum.toolResponses]: {
      error: errorText,