feat: get tokens from api usage (#4671)

2025-07-22 12:20:34 +00:00 · 2025-04-27 01:13:38 +08:00
parent 0720bbe4da
commit 1465999c46
26 changed files with 270 additions and 223 deletions
--- a/packages/global/core/ai/constants.ts
+++ b/packages/global/core/ai/constants.ts
@@ -1,4 +1,13 @@
 import { i18nT } from '../../../web/i18n/utils';
+import type { CompletionUsage } from './type';
+
+export const getLLMDefaultUsage = (): CompletionUsage => {
+  return {
+    prompt_tokens: 0,
+    completion_tokens: 0,
+    total_tokens: 0
+  };
+};

 export enum ChatCompletionRequestMessageRoleEnum {
  'System' = 'system',
--- a/packages/global/core/ai/type.d.ts
+++ b/packages/global/core/ai/type.d.ts
@@ -10,6 +10,7 @@ import type {
 } from 'openai/resources';
 import { ChatMessageTypeEnum } from './constants';
 import { WorkflowInteractiveResponseType } from '../workflow/template/system/interactive/type';
+import { Stream } from 'openai/streaming';
 export * from 'openai/resources';

 // Extension of ChatCompletionMessageParam, Add file url type
@@ -84,6 +85,7 @@ export type CompletionFinishReason =

 export default openai;
 export * from 'openai';
+export type { Stream };

 // Other
 export type PromptTemplateItem = {
--- a/packages/global/core/chat/utils.ts
+++ b/packages/global/core/chat/utils.ts
@@ -185,7 +185,6 @@ export const mergeChatResponseData = (
          runningTime: +((lastResponse.runningTime || 0) + (curr.runningTime || 0)).toFixed(2),
          totalPoints: (lastResponse.totalPoints || 0) + (curr.totalPoints || 0),
          childTotalPoints: (lastResponse.childTotalPoints || 0) + (curr.childTotalPoints || 0),
-          toolCallTokens: (lastResponse.toolCallTokens || 0) + (curr.toolCallTokens || 0),
          toolDetail: [...(lastResponse.toolDetail || []), ...(curr.toolDetail || [])],
          loopDetail: [...(lastResponse.loopDetail || []), ...(curr.loopDetail || [])],
          pluginDetail: [...(lastResponse.pluginDetail || []), ...(curr.pluginDetail || [])]
--- a/packages/global/core/workflow/runtime/type.d.ts
+++ b/packages/global/core/workflow/runtime/type.d.ts
@@ -186,7 +186,6 @@ export type DispatchNodeResponseType = {
  ifElseResult?: string;

  // tool
-  toolCallTokens?: number;
  toolCallInputTokens?: number;
  toolCallOutputTokens?: number;
  toolDetail?: ChatHistoryItemResType[];
--- a/packages/service/core/ai/config.ts
+++ b/packages/service/core/ai/config.ts
@@ -1,5 +1,5 @@
 import OpenAI from '@fastgpt/global/core/ai';
-import {
+import type {
  ChatCompletionCreateParamsNonStreaming,
  ChatCompletionCreateParamsStreaming,
  StreamChatType,
--- a/packages/service/core/ai/functions/createQuestionGuide.ts
+++ b/packages/service/core/ai/functions/createQuestionGuide.ts
@@ -2,7 +2,7 @@ import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d'
 import { createChatCompletion } from '../config';
 import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
 import { loadRequestMessages } from '../../chat/utils';
-import { llmCompletionsBodyFormat } from '../utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
 import {
  QuestionGuidePrompt,
  QuestionGuideFooterPrompt
@@ -35,7 +35,7 @@ export async function createQuestionGuide({
    useVision: false
  });

-  const { response: data } = await createChatCompletion({
+  const { response } = await createChatCompletion({
    body: llmCompletionsBodyFormat(
      {
        model,
@@ -47,21 +47,20 @@ export async function createQuestionGuide({
      model
    )
  });
-
-  const answer = data.choices?.[0]?.message?.content || '';
+  const { text: answer, usage } = await llmResponseToAnswerText(response);

  const start = answer.indexOf('[');
  const end = answer.lastIndexOf(']');

-  const inputTokens = await countGptMessagesTokens(requestMessages);
-  const outputTokens = await countPromptTokens(answer);
+  const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages));
+  const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));

  if (start === -1 || end === -1) {
    addLog.warn('Create question guide error', { answer });
    return {
      result: [],
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens,
+      outputTokens
    };
  }

@@ -81,8 +80,8 @@ export async function createQuestionGuide({

    return {
      result: [],
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens,
+      outputTokens
    };
  }
 }
--- a/packages/service/core/ai/functions/queryExtension.ts
+++ b/packages/service/core/ai/functions/queryExtension.ts
@@ -4,7 +4,7 @@ import { ChatItemType } from '@fastgpt/global/core/chat/type';
 import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
 import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
 import { getLLMModel } from '../model';
-import { llmCompletionsBodyFormat } from '../utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
 import { addLog } from '../../../common/system/log';
 import { filterGPTMessageByMaxContext } from '../../chat/utils';
 import json5 from 'json5';
@@ -167,7 +167,7 @@ assistant: ${chatBg}
    }
  ] as any;

-  const { response: result } = await createChatCompletion({
+  const { response } = await createChatCompletion({
    body: llmCompletionsBodyFormat(
      {
        stream: false,
@@ -178,15 +178,17 @@ assistant: ${chatBg}
      modelData
    )
  });
+  const { text: answer, usage } = await llmResponseToAnswerText(response);
+  const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
+  const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));

-  let answer = result.choices?.[0]?.message?.content || '';
  if (!answer) {
    return {
      rawQuery: query,
      extensionQueries: [],
      model,
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens: inputTokens,
+      outputTokens: outputTokens
    };
  }

@@ -200,8 +202,8 @@ assistant: ${chatBg}
      rawQuery: query,
      extensionQueries: [],
      model,
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens: inputTokens,
+      outputTokens: outputTokens
    };
  }

@@ -218,8 +220,8 @@ assistant: ${chatBg}
      rawQuery: query,
      extensionQueries: (Array.isArray(queries) ? queries : []).slice(0, 5),
      model,
-      inputTokens: await countGptMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer)
+      inputTokens,
+      outputTokens
    };
  } catch (error) {
    addLog.warn('Query extension failed, not a valid JSON', {
@@ -229,8 +231,8 @@ assistant: ${chatBg}
      rawQuery: query,
      extensionQueries: [],
      model,
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens,
+      outputTokens
    };
  }
 };
--- a/packages/service/core/ai/utils.ts
+++ b/packages/service/core/ai/utils.ts
@@ -3,9 +3,12 @@ import {
  ChatCompletionCreateParamsNonStreaming,
  ChatCompletionCreateParamsStreaming,
  CompletionFinishReason,
-  StreamChatType
+  StreamChatType,
+  UnStreamChatType,
+  CompletionUsage
 } from '@fastgpt/global/core/ai/type';
 import { getLLMModel } from './model';
+import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants';

 /* 
  Count response max token
@@ -97,13 +100,42 @@ export const llmCompletionsBodyFormat = <T extends CompletionsBodyType>(
  return requestBody as unknown as InferCompletionsBody<T>;
 };

-export const llmStreamResponseToAnswerText = async (response: StreamChatType) => {
+export const llmStreamResponseToAnswerText = async (
+  response: StreamChatType
+): Promise<{
+  text: string;
+  usage?: CompletionUsage;
+}> => {
  let answer = '';
+  let usage = getLLMDefaultUsage();
  for await (const part of response) {
+    usage = part.usage || usage;
+
    const content = part.choices?.[0]?.delta?.content || '';
    answer += content;
  }
-  return parseReasoningContent(answer)[1];
+  return {
+    text: parseReasoningContent(answer)[1],
+    usage
+  };
+};
+export const llmUnStreamResponseToAnswerText = async (
+  response: UnStreamChatType
+): Promise<{
+  text: string;
+  usage?: CompletionUsage;
+}> => {
+  const answer = response.choices?.[0]?.message?.content || '';
+  return {
+    text: answer,
+    usage: response.usage
+  };
+};
+export const llmResponseToAnswerText = async (response: StreamChatType | UnStreamChatType) => {
+  if ('iterator' in response) {
+    return llmStreamResponseToAnswerText(response);
+  }
+  return llmUnStreamResponseToAnswerText(response);
 };

 // Parse <think></think> tags to think and answer - unstream response
@@ -140,7 +172,7 @@ export const parseReasoningStreamContent = () => {
    part: {
      choices: {
        delta: {
-          content?: string;
+          content?: string | null;
          reasoning_content?: string;
        };
        finish_reason?: CompletionFinishReason;
--- a/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts
+++ b/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts
@@ -19,7 +19,7 @@ import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/ty
 import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
 import { getHandleId } from '@fastgpt/global/core/workflow/utils';
 import { loadRequestMessages } from '../../../chat/utils';
-import { llmCompletionsBodyFormat } from '../../../ai/utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../../../ai/utils';
 import { addLog } from '../../../../common/system/log';
 import { ModelTypeEnum } from '../../../../../global/core/ai/model';
 import { replaceVariable } from '@fastgpt/global/common/string/tools';
@@ -129,7 +129,7 @@ const completions = async ({
    useVision: false
  });

-  const { response: data } = await createChatCompletion({
+  const { response } = await createChatCompletion({
    body: llmCompletionsBodyFormat(
      {
        model: cqModel.model,
@@ -141,7 +141,7 @@ const completions = async ({
    ),
    userKey: externalProvider.openaiAccount
  });
-  const answer = data.choices?.[0].message?.content || '';
+  const { text: answer, usage } = await llmResponseToAnswerText(response);

  // console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
  // console.log(answer, '----');
@@ -156,8 +156,8 @@ const completions = async ({
  }

  return {
-    inputTokens: await countGptMessagesTokens(requestMessages),
-    outputTokens: await countPromptTokens(answer),
+    inputTokens: usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages)),
+    outputTokens: usage?.completion_tokens || (await countPromptTokens(answer)),
    arg: { type: id }
  };
 };
--- a/packages/service/core/workflow/dispatch/agent/extract.ts
+++ b/packages/service/core/workflow/dispatch/agent/extract.ts
@@ -23,14 +23,14 @@ import { getLLMModel } from '../../../ai/model';
 import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
 import json5 from 'json5';
 import {
-  ChatCompletionCreateParams,
  ChatCompletionMessageParam,
-  ChatCompletionTool
+  ChatCompletionTool,
+  UnStreamChatType
 } from '@fastgpt/global/core/ai/type';
 import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
 import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
 import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
-import { llmCompletionsBodyFormat } from '../../../ai/utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../../../ai/utils';
 import { ModelTypeEnum } from '../../../../../global/core/ai/model';
 import {
  getExtractJsonPrompt,
@@ -76,13 +76,6 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
        extractModel
      });
    }
-    if (extractModel.functionCall) {
-      return functionCall({
-        ...props,
-        histories: chatHistories,
-        extractModel
-      });
-    }
    return completions({
      ...props,
      histories: chatHistories,
@@ -233,9 +226,10 @@ const toolChoice = async (props: ActionProps) => {
    }
  ];

-  const { response } = await createChatCompletion({
+  const { response } = (await createChatCompletion({
    body: llmCompletionsBodyFormat(
      {
+        stream: false,
        model: extractModel.model,
        temperature: 0.01,
        messages: filterMessages,
@@ -245,7 +239,7 @@ const toolChoice = async (props: ActionProps) => {
      extractModel
    ),
    userKey: externalProvider.openaiAccount
-  });
+  })) as { response: UnStreamChatType };

  const arg: Record<string, any> = (() => {
    try {
@@ -267,8 +261,9 @@ const toolChoice = async (props: ActionProps) => {
    }
  ];

-  const inputTokens = await countGptMessagesTokens(filterMessages, tools);
-  const outputTokens = await countGptMessagesTokens(AIMessages);
+  const usage = response.usage;
+  const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(filterMessages, tools));
+  const outputTokens = usage?.completion_tokens || (await countGptMessagesTokens(AIMessages));
  return {
    inputTokens,
    outputTokens,
@@ -276,59 +271,6 @@ const toolChoice = async (props: ActionProps) => {
  };
 };

-const functionCall = async (props: ActionProps) => {
-  const { externalProvider, extractModel } = props;
-
-  const { agentFunction, filterMessages } = await getFunctionCallSchema(props);
-  const functions: ChatCompletionCreateParams.Function[] = [agentFunction];
-
-  const { response } = await createChatCompletion({
-    body: llmCompletionsBodyFormat(
-      {
-        model: extractModel.model,
-        temperature: 0.01,
-        messages: filterMessages,
-        function_call: {
-          name: agentFunName
-        },
-        functions
-      },
-      extractModel
-    ),
-    userKey: externalProvider.openaiAccount
-  });
-
-  try {
-    const arg = JSON.parse(response?.choices?.[0]?.message?.function_call?.arguments || '');
-
-    const AIMessages: ChatCompletionMessageParam[] = [
-      {
-        role: ChatCompletionRequestMessageRoleEnum.Assistant,
-        function_call: response.choices?.[0]?.message?.function_call
-      }
-    ];
-
-    const inputTokens = await countGptMessagesTokens(filterMessages, undefined, functions);
-    const outputTokens = await countGptMessagesTokens(AIMessages);
-
-    return {
-      arg,
-      inputTokens,
-      outputTokens
-    };
-  } catch (error) {
-    console.log(response.choices?.[0]?.message);
-
-    console.log('Your model may not support toll_call', error);
-
-    return {
-      arg: {},
-      inputTokens: 0,
-      outputTokens: 0
-    };
-  }
-};
-
 const completions = async ({
  extractModel,
  externalProvider,
@@ -373,7 +315,7 @@ Human: ${content}`
    useVision: false
  });

-  const { response: data } = await createChatCompletion({
+  const { response } = await createChatCompletion({
    body: llmCompletionsBodyFormat(
      {
        model: extractModel.model,
@@ -385,7 +327,9 @@ Human: ${content}`
    ),
    userKey: externalProvider.openaiAccount
  });
-  const answer = data.choices?.[0].message?.content || '';
+  const { text: answer, usage } = await llmResponseToAnswerText(response);
+  const inputTokens = usage?.prompt_tokens || (await countMessagesTokens(messages));
+  const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));

  // parse response
  const jsonStr = sliceJsonStr(answer);
@@ -393,8 +337,8 @@ Human: ${content}`
  if (!jsonStr) {
    return {
      rawResponse: answer,
-      inputTokens: await countMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer),
+      inputTokens,
+      outputTokens,
      arg: {}
    };
  }
@@ -402,8 +346,8 @@ Human: ${content}`
  try {
    return {
      rawResponse: answer,
-      inputTokens: await countMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer),
+      inputTokens,
+      outputTokens,
      arg: json5.parse(jsonStr) as Record<string, any>
    };
  } catch (error) {
@@ -411,8 +355,8 @@ Human: ${content}`
    console.log(error);
    return {
      rawResponse: answer,
-      inputTokens: await countMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer),
+      inputTokens,
+      outputTokens,
      arg: {}
    };
  }
--- a/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
@@ -13,7 +13,10 @@ import { NextApiResponse } from 'next';
 import { responseWriteController } from '../../../../../common/response';
 import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
 import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
-import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  getLLMDefaultUsage
+} from '@fastgpt/global/core/ai/constants';
 import { dispatchWorkFlow } from '../../index';
 import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
 import json5 from 'json5';
@@ -244,17 +247,34 @@ export const runToolWithFunctionCall = async (
    }
  });

-  const { answer, functionCalls } = await (async () => {
-    if (res && isStreamResponse) {
-      return streamResponse({
+  let { answer, functionCalls, inputTokens, outputTokens } = await (async () => {
+    if (isStreamResponse) {
+      if (!res || res.closed) {
+        return {
+          answer: '',
+          functionCalls: [],
+          inputTokens: 0,
+          outputTokens: 0
+        };
+      }
+      const result = await streamResponse({
        res,
        toolNodes,
        stream: aiResponse,
        workflowStreamResponse
      });
+
+      return {
+        answer: result.answer,
+        functionCalls: result.functionCalls,
+        inputTokens: result.usage.prompt_tokens,
+        outputTokens: result.usage.completion_tokens
+      };
    } else {
      const result = aiResponse as ChatCompletion;
      const function_call = result.choices?.[0]?.message?.function_call;
+      const usage = result.usage;
+
      const toolNode = toolNodes.find((node) => node.nodeId === function_call?.name);

      const toolCalls = function_call
@@ -270,7 +290,9 @@ export const runToolWithFunctionCall = async (

      return {
        answer: result.choices?.[0]?.message?.content || '',
-        functionCalls: toolCalls
+        functionCalls: toolCalls,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
      };
    }
  })();
@@ -338,7 +360,7 @@ export const runToolWithFunctionCall = async (
    : flatToolsResponseData;

  const functionCall = functionCalls[0];
-  if (functionCall && !res?.closed) {
+  if (functionCall) {
    // Run the tool, combine its results, and perform another round of AI calls
    const assistantToolMsgParams: ChatCompletionAssistantMessageParam = {
      role: ChatCompletionRequestMessageRoleEnum.Assistant,
@@ -356,8 +378,9 @@ export const runToolWithFunctionCall = async (
    ] as ChatCompletionMessageParam[];
    // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
    // const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
-    const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
-    const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
+    inputTokens =
+      inputTokens || (await countGptMessagesTokens(requestMessages, undefined, functions));
+    outputTokens = outputTokens || (await countGptMessagesTokens([assistantToolMsgParams]));
    /* 
      ...
      user
@@ -459,8 +482,9 @@ export const runToolWithFunctionCall = async (
      content: answer
    };
    const completeMessages = filterMessages.concat(gptAssistantResponse);
-    const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
-    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
+    inputTokens =
+      inputTokens || (await countGptMessagesTokens(requestMessages, undefined, functions));
+    outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));
    // console.log(tokens, 'response token');

    // concat tool assistant
@@ -500,8 +524,10 @@ async function streamResponse({
  let textAnswer = '';
  let functionCalls: ChatCompletionMessageFunctionCall[] = [];
  let functionId = getNanoid();
+  let usage = getLLMDefaultUsage();

  for await (const part of stream) {
+    usage = part.usage || usage;
    if (res.closed) {
      stream.controller?.abort();
      break;
@@ -522,7 +548,7 @@ async function streamResponse({
      });
    } else if (responseChoice.function_call) {
      const functionCall: {
-        arguments: string;
+        arguments?: string;
        name?: string;
      } = responseChoice.function_call;

@@ -532,11 +558,9 @@ async function streamResponse({
        const toolNode = toolNodes.find((item) => item.nodeId === functionCall?.name);

        if (toolNode) {
-          if (functionCall?.arguments === undefined) {
-            functionCall.arguments = '';
-          }
          functionCalls.push({
            ...functionCall,
+            arguments: functionCall.arguments || '',
            id: functionId,
            name: functionCall.name,
            toolName: toolNode.name,
@@ -552,7 +576,7 @@ async function streamResponse({
                toolName: toolNode.name,
                toolAvatar: toolNode.avatar,
                functionName: functionCall.name,
-                params: functionCall.arguments,
+                params: functionCall.arguments || '',
                response: ''
              }
            }
@@ -585,5 +609,5 @@ async function streamResponse({
    }
  }

-  return { answer: textAnswer, functionCalls };
+  return { answer: textAnswer, functionCalls, usage };
 }
--- a/packages/service/core/workflow/dispatch/agent/runTool/index.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/index.ts
@@ -171,7 +171,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
  const {
    toolWorkflowInteractiveResponse,
    dispatchFlowResponse, // tool flow response
-    toolNodeTokens,
    toolNodeInputTokens,
    toolNodeOutputTokens,
    completeMessages = [], // The actual message sent to AI(just save text)
@@ -271,7 +270,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
    [DispatchNodeResponseKeyEnum.nodeResponse]: {
      // 展示的积分消耗
      totalPoints: totalPointsUsage,
-      toolCallTokens: toolNodeTokens,
      toolCallInputTokens: toolNodeInputTokens,
      toolCallOutputTokens: toolNodeOutputTokens,
      childTotalPoints: flatUsages.reduce((sum, item) => sum + item.totalPoints, 0),
--- a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
@@ -9,7 +9,10 @@ import { NextApiResponse } from 'next';
 import { responseWriteController } from '../../../../../common/response';
 import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
 import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
-import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  getLLMDefaultUsage
+} from '@fastgpt/global/core/ai/constants';
 import { dispatchWorkFlow } from '../../index';
 import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
 import json5 from 'json5';
@@ -256,9 +259,18 @@ export const runToolWithPromptCall = async (
    }
  });

-  const { answer, reasoning, finish_reason } = await (async () => {
-    if (res && isStreamResponse) {
-      const { answer, reasoning, finish_reason } = await streamResponse({
+  let { answer, reasoning, finish_reason, inputTokens, outputTokens } = await (async () => {
+    if (isStreamResponse) {
+      if (!res || res.closed) {
+        return {
+          answer: '',
+          reasoning: '',
+          finish_reason: 'close' as const,
+          inputTokens: 0,
+          outputTokens: 0
+        };
+      }
+      const { answer, reasoning, finish_reason, usage } = await streamResponse({
        res,
        toolNodes,
        stream: aiResponse,
@@ -266,18 +278,28 @@ export const runToolWithPromptCall = async (
        aiChatReasoning
      });

-      return { answer, reasoning, finish_reason };
+      return {
+        answer,
+        reasoning,
+        finish_reason,
+        inputTokens: usage.prompt_tokens,
+        outputTokens: usage.completion_tokens
+      };
    } else {
      const finish_reason = aiResponse.choices?.[0]?.finish_reason as CompletionFinishReason;
      const content = aiResponse.choices?.[0]?.message?.content || '';
+      // @ts-ignore
      const reasoningContent: string = aiResponse.choices?.[0]?.message?.reasoning_content || '';
+      const usage = aiResponse.usage;

      // API already parse reasoning content
      if (reasoningContent || !aiChatReasoning) {
        return {
          answer: content,
          reasoning: reasoningContent,
-          finish_reason
+          finish_reason,
+          inputTokens: usage?.prompt_tokens,
+          outputTokens: usage?.completion_tokens
        };
      }

@@ -285,7 +307,9 @@ export const runToolWithPromptCall = async (
      return {
        answer,
        reasoning: think,
-        finish_reason
+        finish_reason,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
      };
    }
  })();
@@ -336,8 +360,8 @@ export const runToolWithPromptCall = async (
      reasoning_text: undefined
    });

-    const inputTokens = await countGptMessagesTokens(requestMessages);
-    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
+    inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
+    outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));

    // concat tool assistant
    const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
@@ -423,8 +447,8 @@ export const runToolWithPromptCall = async (
  };

  // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
-  const inputTokens = await countGptMessagesTokens(requestMessages);
-  const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
+  inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
+  outputTokens = outputTokens || (await countGptMessagesTokens([assistantToolMsgParams]));

  /* 
    ...
@@ -559,9 +583,12 @@ async function streamResponse({
  let answer = '';
  let reasoning = '';
  let finish_reason: CompletionFinishReason = null;
+  let usage = getLLMDefaultUsage();
+
  const { parsePart, getStartTagBuffer } = parseReasoningStreamContent();

  for await (const part of stream) {
+    usage = part.usage || usage;
    if (res.closed) {
      stream.controller?.abort();
      finish_reason = 'close';
@@ -629,7 +656,7 @@ async function streamResponse({
    }
  }

-  return { answer, reasoning, finish_reason };
+  return { answer, reasoning, finish_reason, usage };
 }

 const parseAnswer = (
--- a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
@@ -14,7 +14,10 @@ import { NextApiResponse } from 'next';
 import { responseWriteController } from '../../../../../common/response';
 import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
 import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
-import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  getLLMDefaultUsage
+} from '@fastgpt/global/core/ai/constants';
 import { dispatchWorkFlow } from '../../index';
 import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
 import json5 from 'json5';
@@ -301,19 +304,38 @@ export const runToolWithToolChoice = async (
    }
  });

-  const { answer, toolCalls, finish_reason } = await (async () => {
-    if (res && isStreamResponse) {
-      return streamResponse({
+  let { answer, toolCalls, finish_reason, inputTokens, outputTokens } = await (async () => {
+    if (isStreamResponse) {
+      if (!res || res.closed) {
+        return {
+          answer: '',
+          toolCalls: [],
+          finish_reason: 'close' as const,
+          inputTokens: 0,
+          outputTokens: 0
+        };
+      }
+
+      const result = await streamResponse({
        res,
        workflowStreamResponse,
        toolNodes,
        stream: aiResponse
      });
+
+      return {
+        answer: result.answer,
+        toolCalls: result.toolCalls,
+        finish_reason: result.finish_reason,
+        inputTokens: result.usage.prompt_tokens,
+        outputTokens: result.usage.completion_tokens
+      };
    } else {
      const result = aiResponse as ChatCompletion;
      const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
      const calls = result.choices?.[0]?.message?.tool_calls || [];
      const answer = result.choices?.[0]?.message?.content || '';
+      const usage = result.usage;

      // 加上name和avatar
      const toolCalls = calls.map((tool) => {
@@ -353,7 +375,9 @@ export const runToolWithToolChoice = async (
      return {
        answer,
        toolCalls: toolCalls,
-        finish_reason
+        finish_reason,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
      };
    }
  })();
@@ -447,7 +471,7 @@ export const runToolWithToolChoice = async (
    ? response.dispatchFlowResponse.concat(flatToolsResponseData)
    : flatToolsResponseData;

-  if (toolCalls.length > 0 && !res?.closed) {
+  if (toolCalls.length > 0) {
    // Run the tool, combine its results, and perform another round of AI calls
    const assistantToolMsgParams: ChatCompletionAssistantMessageParam[] = [
      ...(answer
@@ -475,8 +499,8 @@ export const runToolWithToolChoice = async (
    ] as ChatCompletionMessageParam[];

    // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
-    const inputTokens = await countGptMessagesTokens(requestMessages, tools);
-    const outputTokens = await countGptMessagesTokens(assistantToolMsgParams);
+    inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
+    outputTokens = outputTokens || (await countGptMessagesTokens(assistantToolMsgParams));

    /* 
      ...
@@ -580,8 +604,8 @@ export const runToolWithToolChoice = async (
      content: answer
    };
    const completeMessages = filterMessages.concat(gptAssistantResponse);
-    const inputTokens = await countGptMessagesTokens(requestMessages, tools);
-    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
+    inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
+    outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));

    // concat tool assistant
    const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
@@ -619,8 +643,10 @@ async function streamResponse({
  let callingTool: { name: string; arguments: string } | null = null;
  let toolCalls: ChatCompletionMessageToolCall[] = [];
  let finishReason: CompletionFinishReason = null;
+  let usage = getLLMDefaultUsage();

  for await (const part of stream) {
+    usage = part.usage || usage;
    if (res.closed) {
      stream.controller?.abort();
      finishReason = 'close';
@@ -644,6 +670,7 @@ async function streamResponse({
      });
    }
    if (responseChoice?.tool_calls?.[0]) {
+      // @ts-ignore
      const toolCall: ChatCompletionMessageToolCall = responseChoice.tool_calls[0];
      // In a stream response, only one tool is returned at a time.  If have id, description is executing a tool
      if (toolCall.id || callingTool) {
@@ -715,5 +742,5 @@ async function streamResponse({
    }
  }

-  return { answer: textAnswer, toolCalls, finish_reason: finishReason };
+  return { answer: textAnswer, toolCalls, finish_reason: finishReason, usage };
 }
--- a/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts
@@ -36,7 +36,6 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{

 export type RunToolResponse = {
  dispatchFlowResponse: DispatchFlowResponse[];
-  toolNodeTokens?: number; // deprecated
  toolNodeInputTokens: number;
  toolNodeOutputTokens: number;
  completeMessages?: ChatCompletionMessageParam[];
--- a/packages/service/core/workflow/dispatch/chat/oneapi.ts
+++ b/packages/service/core/workflow/dispatch/chat/oneapi.ts
@@ -9,11 +9,15 @@ import { createChatCompletion } from '../../../ai/config';
 import type {
  ChatCompletionMessageParam,
  CompletionFinishReason,
+  CompletionUsage,
  StreamChatType
 } from '@fastgpt/global/core/ai/type.d';
 import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
 import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
-import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  getLLMDefaultUsage
+} from '@fastgpt/global/core/ai/constants';
 import type {
  ChatDispatchProps,
  DispatchNodeResultType
@@ -199,17 +203,19 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
    }
  });

-  const { answerText, reasoningText, finish_reason } = await (async () => {
+  let { answerText, reasoningText, finish_reason, inputTokens, outputTokens } = await (async () => {
    if (isStreamResponse) {
-      if (!res) {
+      if (!res || res.closed) {
        return {
          answerText: '',
          reasoningText: '',
-          finish_reason: 'close' as const
+          finish_reason: 'close' as const,
+          inputTokens: 0,
+          outputTokens: 0
        };
      }
      // sse response
-      const { answer, reasoning, finish_reason } = await streamResponse({
+      const { answer, reasoning, finish_reason, usage } = await streamResponse({
        res,
        stream: response,
        aiChatReasoning,
@@ -221,10 +227,13 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
      return {
        answerText: answer,
        reasoningText: reasoning,
-        finish_reason
+        finish_reason,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
      };
    } else {
      const finish_reason = response.choices?.[0]?.finish_reason as CompletionFinishReason;
+      const usage = response.usage;

      const { content, reasoningContent } = (() => {
        const content = response.choices?.[0]?.message?.content || '';
@@ -269,7 +278,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
      return {
        answerText: content,
        reasoningText: reasoningContent,
-        finish_reason
+        finish_reason,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
      };
    }
  })();
@@ -289,8 +300,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
  const completeMessages = [...requestMessages, ...AIMessages];
  const chatCompleteMessages = GPTMessages2Chats(completeMessages);

-  const inputTokens = await countGptMessagesTokens(requestMessages);
-  const outputTokens = await countGptMessagesTokens(AIMessages);
+  inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
+  outputTokens = outputTokens || (await countGptMessagesTokens(AIMessages));

  const { totalPoints, modelName } = formatModelChars2Points({
    model,
@@ -305,7 +316,6 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
    [DispatchNodeResponseKeyEnum.nodeResponse]: {
      totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
      model: modelName,
-      tokens: inputTokens + outputTokens,
      inputTokens: inputTokens,
      outputTokens: outputTokens,
      query: `${userChatInput}`,
@@ -565,9 +575,13 @@ async function streamResponse({
  let answer = '';
  let reasoning = '';
  let finish_reason: CompletionFinishReason = null;
+  let usage: CompletionUsage = getLLMDefaultUsage();
+
  const { parsePart, getStartTagBuffer } = parseReasoningStreamContent();

  for await (const part of stream) {
+    usage = part.usage || usage;
+
    if (res.closed) {
      stream.controller?.abort();
      finish_reason = 'close';
@@ -614,5 +628,5 @@ async function streamResponse({
    }
  }

-  return { answer, reasoning, finish_reason };
+  return { answer, reasoning, finish_reason, usage };
 }
--- a/packages/web/i18n/en/chat.json
+++ b/packages/web/i18n/en/chat.json
@@ -45,6 +45,7 @@
  "insert_input_guide,_some_data_already_exists": "Duplicate data detected, automatically filtered, {{len}} items inserted",
  "is_chatting": "Chatting in progress... please wait until it finishes",
  "items": "Items",
+  "llm_tokens": "LLM tokens",
  "module_runtime_and": "Total Module Runtime",
  "multiple_AI_conversations": "Multiple AI Conversations",
  "new_input_guide_lexicon": "New Lexicon",
--- a/packages/web/i18n/en/common.json
+++ b/packages/web/i18n/en/common.json
@@ -3,7 +3,7 @@
  "Click_to_expand": "Click to expand",
  "Download": "Download",
  "Export": "Export",
-  "FAQ.ai_point_a": "Each time you use the AI model, a certain amount of AI points will be deducted. For detailed calculation standards, please refer to the 'AI Points Calculation Standards' above.\nToken calculation uses the same formula as GPT-3.5, where 1 Token ≈ 0.7 Chinese characters ≈ 0.9 English words. Consecutive characters may be considered as 1 Token.",
+  "FAQ.ai_point_a": "Each time an AI model is called, a certain amount of AI points will be consumed. \nFor specific calculation standards, please refer to the \"AI integral calculation standards\" above. \nThe system will give priority to the actual usage returned by the model manufacturer. If it is empty, the calculation method of GPT3.5 is used for estimation. 1Token≈0.7 Chinese characters ≈0.9 English words, and the characters that appear continuously may be considered as 1 Tokens.",
  "FAQ.ai_point_expire_a": "Yes, they will expire. After the current package expires, the AI points will be reset to the new package's AI points. Annual package AI points are valid for one year, not monthly.",
  "FAQ.ai_point_expire_q": "Do AI points expire?",
  "FAQ.ai_point_q": "What are AI points?",
@@ -490,18 +490,15 @@
  "core.chat.response.module historyPreview": "History Preview (Only Partial Content Displayed)",
  "core.chat.response.module http result": "Response Body",
  "core.chat.response.module if else Result": "Condition Result",
-  "core.chat.response.module input tokens": "input tokens",
  "core.chat.response.module limit": "Single Search Limit",
  "core.chat.response.module maxToken": "Max Response Tokens",
  "core.chat.response.module model": "Model",
  "core.chat.response.module name": "Model Name",
-  "core.chat.response.module output tokens": "output tokens",
  "core.chat.response.module query": "Question/Search Term",
  "core.chat.response.module quoteList": "Quote Content",
  "core.chat.response.module similarity": "Similarity",
  "core.chat.response.module temperature": "Temperature",
  "core.chat.response.module time": "Run Time",
-  "core.chat.response.module tokens": "Total Tokens",
  "core.chat.response.plugin output": "Plugin Output Value",
  "core.chat.response.search using reRank": "Result Re-Rank",
  "core.chat.response.text output": "Text Output",
--- a/packages/web/i18n/zh-CN/chat.json
+++ b/packages/web/i18n/zh-CN/chat.json
@@ -45,6 +45,7 @@
  "insert_input_guide,_some_data_already_exists": "有重复数据，已自动过滤，共插入 {{len}} 条数据",
  "is_chatting": "正在聊天中...请等待结束",
  "items": "条",
+  "llm_tokens": "LLM tokens",
  "module_runtime_and": "工作流总运行时间",
  "multiple_AI_conversations": "多组 AI 对话",
  "new_input_guide_lexicon": "新词库",
--- a/packages/web/i18n/zh-CN/common.json
+++ b/packages/web/i18n/zh-CN/common.json
@@ -3,7 +3,7 @@
  "Click_to_expand": "点击查看详情",
  "Download": "下载",
  "Export": "导出",
-  "FAQ.ai_point_a": "每次调用AI模型时，都会消耗一定的AI积分。具体的计算标准可参考上方的“AI 积分计算标准”。\nToken计算采用GPT3.5相同公式，1Token≈0.7中文字符≈0.9英文单词，连续出现的字符可能被认为是1个Tokens。",
+  "FAQ.ai_point_a": "每次调用AI模型时，都会消耗一定的AI积分。具体的计算标准可参考上方的“AI 积分计算标准”。系统会优先采用模型厂商返回的实际 usage，若为空，则采用GPT3.5的计算方式进行估算，1Token≈0.7中文字符≈0.9英文单词，连续出现的字符可能被认为是1个Tokens。",
  "FAQ.ai_point_expire_a": "会过期。当前套餐过期后，AI积分将会清空，并更新为新套餐的AI积分。年度套餐的AI积分时长为1年，而不是每个月。",
  "FAQ.ai_point_expire_q": "AI积分会过期么？",
  "FAQ.ai_point_q": "什么是AI积分？",
@@ -489,18 +489,15 @@
  "core.chat.response.module historyPreview": "记录预览(仅展示部分内容)",
  "core.chat.response.module http result": "响应体",
  "core.chat.response.module if else Result": "判断器结果",
-  "core.chat.response.module input tokens": "输入 Tokens",
  "core.chat.response.module limit": "单次搜索上限",
  "core.chat.response.module maxToken": "最大响应 tokens",
  "core.chat.response.module model": "模型",
  "core.chat.response.module name": "模型名",
-  "core.chat.response.module output tokens": "输出 Tokens",
  "core.chat.response.module query": "问题/检索词",
  "core.chat.response.module quoteList": "引用内容",
  "core.chat.response.module similarity": "相似度",
  "core.chat.response.module temperature": "温度",
  "core.chat.response.module time": "运行时长",
-  "core.chat.response.module tokens": "AI Tokens总量",
  "core.chat.response.plugin output": "插件输出值",
  "core.chat.response.search using reRank": "结果重排",
  "core.chat.response.text output": "文本输出",
--- a/packages/web/i18n/zh-Hant/chat.json
+++ b/packages/web/i18n/zh-Hant/chat.json
@@ -43,6 +43,7 @@
  "insert_input_guide,_some_data_already_exists": "偵測到重複資料，已自動過濾，共插入 {{len}} 筆資料",
  "is_chatting": "對話進行中...請稍候",
  "items": "筆",
+  "llm_tokens": "LLM tokens",
  "module_runtime_and": "模組執行總時間",
  "moveCancel": "上滑取消",
  "multiple_AI_conversations": "多組 AI 對話",
--- a/packages/web/i18n/zh-Hant/common.json
+++ b/packages/web/i18n/zh-Hant/common.json
@@ -3,7 +3,7 @@
  "Click_to_expand": "點選檢視詳細資訊",
  "Download": "下載",
  "Export": "匯出",
-  "FAQ.ai_point_a": "每次呼叫 AI 模型時，都會消耗一定數量的 AI 點數。詳細的計算標準請參考上方的「AI 點數計算標準」。\nToken 計算採用與 GPT3.5 相同的公式，1 Token ≈ 0.7 個中文字 ≈ 0.9 個英文單字，連續出現的字元可能會被視為 1 個 Token。",
+  "FAQ.ai_point_a": "每次調用AI模型時，都會消耗一定的AI積分。\n具體的計算標準可參考上方的“AI 積分計算標準”。\n系統會優先採用模型廠商返回的實際 usage，若為空，則採用GPT3.5的計算方式進行估算，1Token≈0.7中文字符≈0.9英文單詞，連續出現的字符可能被認為是1個Tokens。",
  "FAQ.ai_point_expire_a": "會過期。目前方案過期後，AI 點數將會清空並更新為新方案的 AI 點數。年度方案的 AI 點數有效期為一年，而不是每個月重設。",
  "FAQ.ai_point_expire_q": "AI 點數會過期嗎？",
  "FAQ.ai_point_q": "什麼是 AI 點數？",
@@ -489,18 +489,15 @@
  "core.chat.response.module historyPreview": "記錄預覽（僅顯示部分內容）",
  "core.chat.response.module http result": "回應內容",
  "core.chat.response.module if else Result": "條件判斷結果",
-  "core.chat.response.module input tokens": "輸入 tokens",
  "core.chat.response.module limit": "單次搜尋上限",
  "core.chat.response.module maxToken": "最大回應 Token 數",
  "core.chat.response.module model": "模型",
  "core.chat.response.module name": "模型名稱",
-  "core.chat.response.module output tokens": "輸出 tokens",
  "core.chat.response.module query": "問題/搜尋詞",
  "core.chat.response.module quoteList": "引用內容",
  "core.chat.response.module similarity": "相似度",
  "core.chat.response.module temperature": "溫度",
  "core.chat.response.module time": "執行時長",
-  "core.chat.response.module tokens": "總 Token 數",
  "core.chat.response.plugin output": "外掛程式輸出值",
  "core.chat.response.search using reRank": "結果重新排名",
  "core.chat.response.text output": "文字輸出",