diff --git a/docSite/content/zh-cn/docs/development/upgrading/497.md b/docSite/content/zh-cn/docs/development/upgrading/497.md
index 6b966525b..b0c22695c 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/497.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/497.md
@@ -24,6 +24,7 @@ weight: 793
 2. 调整 PG vector 查询语句，强制使用向量索引。
 3. 对话时间统计，准确返回工作流整体运行时间。
 4. 从 ai_proxy 获取音频解析时长。
+5. AI 模型 Token 值均优先采用 API usage，确保 tokens 值准确，若为空，则再采用 GPT3.5 的估算方式。
 
 ## 🐛 修复
 
diff --git a/packages/global/core/ai/constants.ts b/packages/global/core/ai/constants.ts
index 538fc1098..358a66118 100644
--- a/packages/global/core/ai/constants.ts
+++ b/packages/global/core/ai/constants.ts
@@ -1,4 +1,13 @@
 import { i18nT } from '../../../web/i18n/utils';
+import type { CompletionUsage } from './type';
+
+export const getLLMDefaultUsage = (): CompletionUsage => {
+  return {
+    prompt_tokens: 0,
+    completion_tokens: 0,
+    total_tokens: 0
+  };
+};
 
 export enum ChatCompletionRequestMessageRoleEnum {
   'System' = 'system',
diff --git a/packages/global/core/ai/type.d.ts b/packages/global/core/ai/type.d.ts
index c7d1fa740..ce77cc207 100644
--- a/packages/global/core/ai/type.d.ts
+++ b/packages/global/core/ai/type.d.ts
@@ -10,6 +10,7 @@ import type {
 } from 'openai/resources';
 import { ChatMessageTypeEnum } from './constants';
 import { WorkflowInteractiveResponseType } from '../workflow/template/system/interactive/type';
+import { Stream } from 'openai/streaming';
 export * from 'openai/resources';
 
 // Extension of ChatCompletionMessageParam, Add file url type
@@ -84,6 +85,7 @@ export type CompletionFinishReason =
 
 export default openai;
 export * from 'openai';
+export type { Stream };
 
 // Other
 export type PromptTemplateItem = {
diff --git a/packages/global/core/chat/utils.ts b/packages/global/core/chat/utils.ts
index 5423d39ff..9333ec278 100644
--- a/packages/global/core/chat/utils.ts
+++ b/packages/global/core/chat/utils.ts
@@ -185,7 +185,6 @@ export const mergeChatResponseData = (
           runningTime: +((lastResponse.runningTime || 0) + (curr.runningTime || 0)).toFixed(2),
           totalPoints: (lastResponse.totalPoints || 0) + (curr.totalPoints || 0),
           childTotalPoints: (lastResponse.childTotalPoints || 0) + (curr.childTotalPoints || 0),
-          toolCallTokens: (lastResponse.toolCallTokens || 0) + (curr.toolCallTokens || 0),
           toolDetail: [...(lastResponse.toolDetail || []), ...(curr.toolDetail || [])],
           loopDetail: [...(lastResponse.loopDetail || []), ...(curr.loopDetail || [])],
           pluginDetail: [...(lastResponse.pluginDetail || []), ...(curr.pluginDetail || [])]
diff --git a/packages/global/core/workflow/runtime/type.d.ts b/packages/global/core/workflow/runtime/type.d.ts
index 6631cd297..343bb4075 100644
--- a/packages/global/core/workflow/runtime/type.d.ts
+++ b/packages/global/core/workflow/runtime/type.d.ts
@@ -186,7 +186,6 @@ export type DispatchNodeResponseType = {
   ifElseResult?: string;
 
   // tool
-  toolCallTokens?: number;
   toolCallInputTokens?: number;
   toolCallOutputTokens?: number;
   toolDetail?: ChatHistoryItemResType[];
diff --git a/packages/service/core/ai/config.ts b/packages/service/core/ai/config.ts
index e1af8b173..f4f93ea98 100644
--- a/packages/service/core/ai/config.ts
+++ b/packages/service/core/ai/config.ts
@@ -1,5 +1,5 @@
 import OpenAI from '@fastgpt/global/core/ai';
-import {
+import type {
   ChatCompletionCreateParamsNonStreaming,
   ChatCompletionCreateParamsStreaming,
   StreamChatType,
diff --git a/packages/service/core/ai/functions/createQuestionGuide.ts b/packages/service/core/ai/functions/createQuestionGuide.ts
index b096b6f32..d8eca0def 100644
--- a/packages/service/core/ai/functions/createQuestionGuide.ts
+++ b/packages/service/core/ai/functions/createQuestionGuide.ts
@@ -2,7 +2,7 @@ import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d'
 import { createChatCompletion } from '../config';
 import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
 import { loadRequestMessages } from '../../chat/utils';
-import { llmCompletionsBodyFormat } from '../utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
 import {
   QuestionGuidePrompt,
   QuestionGuideFooterPrompt
@@ -35,7 +35,7 @@ export async function createQuestionGuide({
     useVision: false
   });
 
-  const { response: data } = await createChatCompletion({
+  const { response } = await createChatCompletion({
     body: llmCompletionsBodyFormat(
       {
         model,
@@ -47,21 +47,20 @@ export async function createQuestionGuide({
       model
     )
   });
-
-  const answer = data.choices?.[0]?.message?.content || '';
+  const { text: answer, usage } = await llmResponseToAnswerText(response);
 
   const start = answer.indexOf('[');
   const end = answer.lastIndexOf(']');
 
-  const inputTokens = await countGptMessagesTokens(requestMessages);
-  const outputTokens = await countPromptTokens(answer);
+  const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages));
+  const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
 
   if (start === -1 || end === -1) {
     addLog.warn('Create question guide error', { answer });
     return {
       result: [],
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens,
+      outputTokens
     };
   }
 
@@ -81,8 +80,8 @@ export async function createQuestionGuide({
 
     return {
       result: [],
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens,
+      outputTokens
     };
   }
 }
diff --git a/packages/service/core/ai/functions/queryExtension.ts b/packages/service/core/ai/functions/queryExtension.ts
index c4b85ffcd..c94a8acb4 100644
--- a/packages/service/core/ai/functions/queryExtension.ts
+++ b/packages/service/core/ai/functions/queryExtension.ts
@@ -4,7 +4,7 @@ import { ChatItemType } from '@fastgpt/global/core/chat/type';
 import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
 import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
 import { getLLMModel } from '../model';
-import { llmCompletionsBodyFormat } from '../utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
 import { addLog } from '../../../common/system/log';
 import { filterGPTMessageByMaxContext } from '../../chat/utils';
 import json5 from 'json5';
@@ -167,7 +167,7 @@ assistant: ${chatBg}
     }
   ] as any;
 
-  const { response: result } = await createChatCompletion({
+  const { response } = await createChatCompletion({
     body: llmCompletionsBodyFormat(
       {
         stream: false,
@@ -178,15 +178,17 @@ assistant: ${chatBg}
       modelData
     )
   });
+  const { text: answer, usage } = await llmResponseToAnswerText(response);
+  const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
+  const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
 
-  let answer = result.choices?.[0]?.message?.content || '';
   if (!answer) {
     return {
       rawQuery: query,
       extensionQueries: [],
       model,
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens: inputTokens,
+      outputTokens: outputTokens
     };
   }
 
@@ -200,8 +202,8 @@ assistant: ${chatBg}
       rawQuery: query,
       extensionQueries: [],
       model,
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens: inputTokens,
+      outputTokens: outputTokens
     };
   }
 
@@ -218,8 +220,8 @@ assistant: ${chatBg}
       rawQuery: query,
       extensionQueries: (Array.isArray(queries) ? queries : []).slice(0, 5),
       model,
-      inputTokens: await countGptMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer)
+      inputTokens,
+      outputTokens
     };
   } catch (error) {
     addLog.warn('Query extension failed, not a valid JSON', {
@@ -229,8 +231,8 @@ assistant: ${chatBg}
       rawQuery: query,
       extensionQueries: [],
       model,
-      inputTokens: 0,
-      outputTokens: 0
+      inputTokens,
+      outputTokens
     };
   }
 };
diff --git a/packages/service/core/ai/utils.ts b/packages/service/core/ai/utils.ts
index 3cae8b086..161b8e21c 100644
--- a/packages/service/core/ai/utils.ts
+++ b/packages/service/core/ai/utils.ts
@@ -3,9 +3,12 @@ import {
   ChatCompletionCreateParamsNonStreaming,
   ChatCompletionCreateParamsStreaming,
   CompletionFinishReason,
-  StreamChatType
+  StreamChatType,
+  UnStreamChatType,
+  CompletionUsage
 } from '@fastgpt/global/core/ai/type';
 import { getLLMModel } from './model';
+import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants';
 
 /* 
   Count response max token
@@ -97,13 +100,42 @@ export const llmCompletionsBodyFormat = <T extends CompletionsBodyType>(
   return requestBody as unknown as InferCompletionsBody<T>;
 };
 
-export const llmStreamResponseToAnswerText = async (response: StreamChatType) => {
+export const llmStreamResponseToAnswerText = async (
+  response: StreamChatType
+): Promise<{
+  text: string;
+  usage?: CompletionUsage;
+}> => {
   let answer = '';
+  let usage = getLLMDefaultUsage();
   for await (const part of response) {
+    usage = part.usage || usage;
+
     const content = part.choices?.[0]?.delta?.content || '';
     answer += content;
   }
-  return parseReasoningContent(answer)[1];
+  return {
+    text: parseReasoningContent(answer)[1],
+    usage
+  };
+};
+export const llmUnStreamResponseToAnswerText = async (
+  response: UnStreamChatType
+): Promise<{
+  text: string;
+  usage?: CompletionUsage;
+}> => {
+  const answer = response.choices?.[0]?.message?.content || '';
+  return {
+    text: answer,
+    usage: response.usage
+  };
+};
+export const llmResponseToAnswerText = async (response: StreamChatType | UnStreamChatType) => {
+  if ('iterator' in response) {
+    return llmStreamResponseToAnswerText(response);
+  }
+  return llmUnStreamResponseToAnswerText(response);
 };
 
 // Parse <think></think> tags to think and answer - unstream response
@@ -140,7 +172,7 @@ export const parseReasoningStreamContent = () => {
     part: {
       choices: {
         delta: {
-          content?: string;
+          content?: string | null;
           reasoning_content?: string;
         };
         finish_reason?: CompletionFinishReason;
diff --git a/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts b/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts
index e0b608e83..ec54bf5e9 100644
--- a/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts
+++ b/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts
@@ -19,7 +19,7 @@ import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/ty
 import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
 import { getHandleId } from '@fastgpt/global/core/workflow/utils';
 import { loadRequestMessages } from '../../../chat/utils';
-import { llmCompletionsBodyFormat } from '../../../ai/utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../../../ai/utils';
 import { addLog } from '../../../../common/system/log';
 import { ModelTypeEnum } from '../../../../../global/core/ai/model';
 import { replaceVariable } from '@fastgpt/global/common/string/tools';
@@ -129,7 +129,7 @@ const completions = async ({
     useVision: false
   });
 
-  const { response: data } = await createChatCompletion({
+  const { response } = await createChatCompletion({
     body: llmCompletionsBodyFormat(
       {
         model: cqModel.model,
@@ -141,7 +141,7 @@ const completions = async ({
     ),
     userKey: externalProvider.openaiAccount
   });
-  const answer = data.choices?.[0].message?.content || '';
+  const { text: answer, usage } = await llmResponseToAnswerText(response);
 
   // console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
   // console.log(answer, '----');
@@ -156,8 +156,8 @@ const completions = async ({
   }
 
   return {
-    inputTokens: await countGptMessagesTokens(requestMessages),
-    outputTokens: await countPromptTokens(answer),
+    inputTokens: usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages)),
+    outputTokens: usage?.completion_tokens || (await countPromptTokens(answer)),
     arg: { type: id }
   };
 };
diff --git a/packages/service/core/workflow/dispatch/agent/extract.ts b/packages/service/core/workflow/dispatch/agent/extract.ts
index e3886f7e2..2d4b682a4 100644
--- a/packages/service/core/workflow/dispatch/agent/extract.ts
+++ b/packages/service/core/workflow/dispatch/agent/extract.ts
@@ -23,14 +23,14 @@ import { getLLMModel } from '../../../ai/model';
 import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
 import json5 from 'json5';
 import {
-  ChatCompletionCreateParams,
   ChatCompletionMessageParam,
-  ChatCompletionTool
+  ChatCompletionTool,
+  UnStreamChatType
 } from '@fastgpt/global/core/ai/type';
 import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
 import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
 import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
-import { llmCompletionsBodyFormat } from '../../../ai/utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../../../ai/utils';
 import { ModelTypeEnum } from '../../../../../global/core/ai/model';
 import {
   getExtractJsonPrompt,
@@ -76,13 +76,6 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
         extractModel
       });
     }
-    if (extractModel.functionCall) {
-      return functionCall({
-        ...props,
-        histories: chatHistories,
-        extractModel
-      });
-    }
     return completions({
       ...props,
       histories: chatHistories,
@@ -233,9 +226,10 @@ const toolChoice = async (props: ActionProps) => {
     }
   ];
 
-  const { response } = await createChatCompletion({
+  const { response } = (await createChatCompletion({
     body: llmCompletionsBodyFormat(
       {
+        stream: false,
         model: extractModel.model,
         temperature: 0.01,
         messages: filterMessages,
@@ -245,7 +239,7 @@ const toolChoice = async (props: ActionProps) => {
       extractModel
     ),
     userKey: externalProvider.openaiAccount
-  });
+  })) as { response: UnStreamChatType };
 
   const arg: Record<string, any> = (() => {
     try {
@@ -267,8 +261,9 @@ const toolChoice = async (props: ActionProps) => {
     }
   ];
 
-  const inputTokens = await countGptMessagesTokens(filterMessages, tools);
-  const outputTokens = await countGptMessagesTokens(AIMessages);
+  const usage = response.usage;
+  const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(filterMessages, tools));
+  const outputTokens = usage?.completion_tokens || (await countGptMessagesTokens(AIMessages));
   return {
     inputTokens,
     outputTokens,
@@ -276,59 +271,6 @@ const toolChoice = async (props: ActionProps) => {
   };
 };
 
-const functionCall = async (props: ActionProps) => {
-  const { externalProvider, extractModel } = props;
-
-  const { agentFunction, filterMessages } = await getFunctionCallSchema(props);
-  const functions: ChatCompletionCreateParams.Function[] = [agentFunction];
-
-  const { response } = await createChatCompletion({
-    body: llmCompletionsBodyFormat(
-      {
-        model: extractModel.model,
-        temperature: 0.01,
-        messages: filterMessages,
-        function_call: {
-          name: agentFunName
-        },
-        functions
-      },
-      extractModel
-    ),
-    userKey: externalProvider.openaiAccount
-  });
-
-  try {
-    const arg = JSON.parse(response?.choices?.[0]?.message?.function_call?.arguments || '');
-
-    const AIMessages: ChatCompletionMessageParam[] = [
-      {
-        role: ChatCompletionRequestMessageRoleEnum.Assistant,
-        function_call: response.choices?.[0]?.message?.function_call
-      }
-    ];
-
-    const inputTokens = await countGptMessagesTokens(filterMessages, undefined, functions);
-    const outputTokens = await countGptMessagesTokens(AIMessages);
-
-    return {
-      arg,
-      inputTokens,
-      outputTokens
-    };
-  } catch (error) {
-    console.log(response.choices?.[0]?.message);
-
-    console.log('Your model may not support toll_call', error);
-
-    return {
-      arg: {},
-      inputTokens: 0,
-      outputTokens: 0
-    };
-  }
-};
-
 const completions = async ({
   extractModel,
   externalProvider,
@@ -373,7 +315,7 @@ Human: ${content}`
     useVision: false
   });
 
-  const { response: data } = await createChatCompletion({
+  const { response } = await createChatCompletion({
     body: llmCompletionsBodyFormat(
       {
         model: extractModel.model,
@@ -385,7 +327,9 @@ Human: ${content}`
     ),
     userKey: externalProvider.openaiAccount
   });
-  const answer = data.choices?.[0].message?.content || '';
+  const { text: answer, usage } = await llmResponseToAnswerText(response);
+  const inputTokens = usage?.prompt_tokens || (await countMessagesTokens(messages));
+  const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
 
   // parse response
   const jsonStr = sliceJsonStr(answer);
@@ -393,8 +337,8 @@ Human: ${content}`
   if (!jsonStr) {
     return {
       rawResponse: answer,
-      inputTokens: await countMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer),
+      inputTokens,
+      outputTokens,
       arg: {}
     };
   }
@@ -402,8 +346,8 @@ Human: ${content}`
   try {
     return {
       rawResponse: answer,
-      inputTokens: await countMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer),
+      inputTokens,
+      outputTokens,
       arg: json5.parse(jsonStr) as Record<string, any>
     };
   } catch (error) {
@@ -411,8 +355,8 @@ Human: ${content}`
     console.log(error);
     return {
       rawResponse: answer,
-      inputTokens: await countMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer),
+      inputTokens,
+      outputTokens,
       arg: {}
     };
   }
diff --git a/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts b/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
index 1ae995a13..daf074efb 100644
--- a/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
@@ -13,7 +13,10 @@ import { NextApiResponse } from 'next';
 import { responseWriteController } from '../../../../../common/response';
 import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
 import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
-import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  getLLMDefaultUsage
+} from '@fastgpt/global/core/ai/constants';
 import { dispatchWorkFlow } from '../../index';
 import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
 import json5 from 'json5';
@@ -244,17 +247,34 @@ export const runToolWithFunctionCall = async (
     }
   });
 
-  const { answer, functionCalls } = await (async () => {
-    if (res && isStreamResponse) {
-      return streamResponse({
+  let { answer, functionCalls, inputTokens, outputTokens } = await (async () => {
+    if (isStreamResponse) {
+      if (!res || res.closed) {
+        return {
+          answer: '',
+          functionCalls: [],
+          inputTokens: 0,
+          outputTokens: 0
+        };
+      }
+      const result = await streamResponse({
         res,
         toolNodes,
         stream: aiResponse,
         workflowStreamResponse
       });
+
+      return {
+        answer: result.answer,
+        functionCalls: result.functionCalls,
+        inputTokens: result.usage.prompt_tokens,
+        outputTokens: result.usage.completion_tokens
+      };
     } else {
       const result = aiResponse as ChatCompletion;
       const function_call = result.choices?.[0]?.message?.function_call;
+      const usage = result.usage;
+
       const toolNode = toolNodes.find((node) => node.nodeId === function_call?.name);
 
       const toolCalls = function_call
@@ -270,7 +290,9 @@ export const runToolWithFunctionCall = async (
 
       return {
         answer: result.choices?.[0]?.message?.content || '',
-        functionCalls: toolCalls
+        functionCalls: toolCalls,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
       };
     }
   })();
@@ -338,7 +360,7 @@ export const runToolWithFunctionCall = async (
     : flatToolsResponseData;
 
   const functionCall = functionCalls[0];
-  if (functionCall && !res?.closed) {
+  if (functionCall) {
     // Run the tool, combine its results, and perform another round of AI calls
     const assistantToolMsgParams: ChatCompletionAssistantMessageParam = {
       role: ChatCompletionRequestMessageRoleEnum.Assistant,
@@ -356,8 +378,9 @@ export const runToolWithFunctionCall = async (
     ] as ChatCompletionMessageParam[];
     // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
     // const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
-    const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
-    const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
+    inputTokens =
+      inputTokens || (await countGptMessagesTokens(requestMessages, undefined, functions));
+    outputTokens = outputTokens || (await countGptMessagesTokens([assistantToolMsgParams]));
     /* 
       ...
       user
@@ -459,8 +482,9 @@ export const runToolWithFunctionCall = async (
       content: answer
     };
     const completeMessages = filterMessages.concat(gptAssistantResponse);
-    const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
-    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
+    inputTokens =
+      inputTokens || (await countGptMessagesTokens(requestMessages, undefined, functions));
+    outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));
     // console.log(tokens, 'response token');
 
     // concat tool assistant
@@ -500,8 +524,10 @@ async function streamResponse({
   let textAnswer = '';
   let functionCalls: ChatCompletionMessageFunctionCall[] = [];
   let functionId = getNanoid();
+  let usage = getLLMDefaultUsage();
 
   for await (const part of stream) {
+    usage = part.usage || usage;
     if (res.closed) {
       stream.controller?.abort();
       break;
@@ -522,7 +548,7 @@ async function streamResponse({
       });
     } else if (responseChoice.function_call) {
       const functionCall: {
-        arguments: string;
+        arguments?: string;
         name?: string;
       } = responseChoice.function_call;
 
@@ -532,11 +558,9 @@ async function streamResponse({
         const toolNode = toolNodes.find((item) => item.nodeId === functionCall?.name);
 
         if (toolNode) {
-          if (functionCall?.arguments === undefined) {
-            functionCall.arguments = '';
-          }
           functionCalls.push({
             ...functionCall,
+            arguments: functionCall.arguments || '',
             id: functionId,
             name: functionCall.name,
             toolName: toolNode.name,
@@ -552,7 +576,7 @@ async function streamResponse({
                 toolName: toolNode.name,
                 toolAvatar: toolNode.avatar,
                 functionName: functionCall.name,
-                params: functionCall.arguments,
+                params: functionCall.arguments || '',
                 response: ''
               }
             }
@@ -585,5 +609,5 @@ async function streamResponse({
     }
   }
 
-  return { answer: textAnswer, functionCalls };
+  return { answer: textAnswer, functionCalls, usage };
 }
diff --git a/packages/service/core/workflow/dispatch/agent/runTool/index.ts b/packages/service/core/workflow/dispatch/agent/runTool/index.ts
index 7850e2eb3..6e7d09f07 100644
--- a/packages/service/core/workflow/dispatch/agent/runTool/index.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/index.ts
@@ -171,7 +171,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
   const {
     toolWorkflowInteractiveResponse,
     dispatchFlowResponse, // tool flow response
-    toolNodeTokens,
     toolNodeInputTokens,
     toolNodeOutputTokens,
     completeMessages = [], // The actual message sent to AI(just save text)
@@ -271,7 +270,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
     [DispatchNodeResponseKeyEnum.nodeResponse]: {
       // 展示的积分消耗
       totalPoints: totalPointsUsage,
-      toolCallTokens: toolNodeTokens,
       toolCallInputTokens: toolNodeInputTokens,
       toolCallOutputTokens: toolNodeOutputTokens,
       childTotalPoints: flatUsages.reduce((sum, item) => sum + item.totalPoints, 0),
diff --git a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
index 808006a7f..215f79ed2 100644
--- a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
@@ -9,7 +9,10 @@ import { NextApiResponse } from 'next';
 import { responseWriteController } from '../../../../../common/response';
 import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
 import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
-import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  getLLMDefaultUsage
+} from '@fastgpt/global/core/ai/constants';
 import { dispatchWorkFlow } from '../../index';
 import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
 import json5 from 'json5';
@@ -256,9 +259,18 @@ export const runToolWithPromptCall = async (
     }
   });
 
-  const { answer, reasoning, finish_reason } = await (async () => {
-    if (res && isStreamResponse) {
-      const { answer, reasoning, finish_reason } = await streamResponse({
+  let { answer, reasoning, finish_reason, inputTokens, outputTokens } = await (async () => {
+    if (isStreamResponse) {
+      if (!res || res.closed) {
+        return {
+          answer: '',
+          reasoning: '',
+          finish_reason: 'close' as const,
+          inputTokens: 0,
+          outputTokens: 0
+        };
+      }
+      const { answer, reasoning, finish_reason, usage } = await streamResponse({
         res,
         toolNodes,
         stream: aiResponse,
@@ -266,18 +278,28 @@ export const runToolWithPromptCall = async (
         aiChatReasoning
       });
 
-      return { answer, reasoning, finish_reason };
+      return {
+        answer,
+        reasoning,
+        finish_reason,
+        inputTokens: usage.prompt_tokens,
+        outputTokens: usage.completion_tokens
+      };
     } else {
       const finish_reason = aiResponse.choices?.[0]?.finish_reason as CompletionFinishReason;
       const content = aiResponse.choices?.[0]?.message?.content || '';
+      // @ts-ignore
       const reasoningContent: string = aiResponse.choices?.[0]?.message?.reasoning_content || '';
+      const usage = aiResponse.usage;
 
       // API already parse reasoning content
       if (reasoningContent || !aiChatReasoning) {
         return {
           answer: content,
           reasoning: reasoningContent,
-          finish_reason
+          finish_reason,
+          inputTokens: usage?.prompt_tokens,
+          outputTokens: usage?.completion_tokens
         };
       }
 
@@ -285,7 +307,9 @@ export const runToolWithPromptCall = async (
       return {
         answer,
         reasoning: think,
-        finish_reason
+        finish_reason,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
       };
     }
   })();
@@ -336,8 +360,8 @@ export const runToolWithPromptCall = async (
       reasoning_text: undefined
     });
 
-    const inputTokens = await countGptMessagesTokens(requestMessages);
-    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
+    inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
+    outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));
 
     // concat tool assistant
     const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
@@ -423,8 +447,8 @@ export const runToolWithPromptCall = async (
   };
 
   // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
-  const inputTokens = await countGptMessagesTokens(requestMessages);
-  const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
+  inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
+  outputTokens = outputTokens || (await countGptMessagesTokens([assistantToolMsgParams]));
 
   /* 
     ...
@@ -559,9 +583,12 @@ async function streamResponse({
   let answer = '';
   let reasoning = '';
   let finish_reason: CompletionFinishReason = null;
+  let usage = getLLMDefaultUsage();
+
   const { parsePart, getStartTagBuffer } = parseReasoningStreamContent();
 
   for await (const part of stream) {
+    usage = part.usage || usage;
     if (res.closed) {
       stream.controller?.abort();
       finish_reason = 'close';
@@ -629,7 +656,7 @@ async function streamResponse({
     }
   }
 
-  return { answer, reasoning, finish_reason };
+  return { answer, reasoning, finish_reason, usage };
 }
 
 const parseAnswer = (
diff --git a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
index 1b7bfe5b5..2b61bd371 100644
--- a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
@@ -14,7 +14,10 @@ import { NextApiResponse } from 'next';
 import { responseWriteController } from '../../../../../common/response';
 import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
 import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
-import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  getLLMDefaultUsage
+} from '@fastgpt/global/core/ai/constants';
 import { dispatchWorkFlow } from '../../index';
 import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
 import json5 from 'json5';
@@ -301,19 +304,38 @@ export const runToolWithToolChoice = async (
     }
   });
 
-  const { answer, toolCalls, finish_reason } = await (async () => {
-    if (res && isStreamResponse) {
-      return streamResponse({
+  let { answer, toolCalls, finish_reason, inputTokens, outputTokens } = await (async () => {
+    if (isStreamResponse) {
+      if (!res || res.closed) {
+        return {
+          answer: '',
+          toolCalls: [],
+          finish_reason: 'close' as const,
+          inputTokens: 0,
+          outputTokens: 0
+        };
+      }
+
+      const result = await streamResponse({
         res,
         workflowStreamResponse,
         toolNodes,
         stream: aiResponse
       });
+
+      return {
+        answer: result.answer,
+        toolCalls: result.toolCalls,
+        finish_reason: result.finish_reason,
+        inputTokens: result.usage.prompt_tokens,
+        outputTokens: result.usage.completion_tokens
+      };
     } else {
       const result = aiResponse as ChatCompletion;
       const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
       const calls = result.choices?.[0]?.message?.tool_calls || [];
       const answer = result.choices?.[0]?.message?.content || '';
+      const usage = result.usage;
 
       // 加上name和avatar
       const toolCalls = calls.map((tool) => {
@@ -353,7 +375,9 @@ export const runToolWithToolChoice = async (
       return {
         answer,
         toolCalls: toolCalls,
-        finish_reason
+        finish_reason,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
       };
     }
   })();
@@ -447,7 +471,7 @@ export const runToolWithToolChoice = async (
     ? response.dispatchFlowResponse.concat(flatToolsResponseData)
     : flatToolsResponseData;
 
-  if (toolCalls.length > 0 && !res?.closed) {
+  if (toolCalls.length > 0) {
     // Run the tool, combine its results, and perform another round of AI calls
     const assistantToolMsgParams: ChatCompletionAssistantMessageParam[] = [
       ...(answer
@@ -475,8 +499,8 @@ export const runToolWithToolChoice = async (
     ] as ChatCompletionMessageParam[];
 
     // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
-    const inputTokens = await countGptMessagesTokens(requestMessages, tools);
-    const outputTokens = await countGptMessagesTokens(assistantToolMsgParams);
+    inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
+    outputTokens = outputTokens || (await countGptMessagesTokens(assistantToolMsgParams));
 
     /* 
       ...
@@ -580,8 +604,8 @@ export const runToolWithToolChoice = async (
       content: answer
     };
     const completeMessages = filterMessages.concat(gptAssistantResponse);
-    const inputTokens = await countGptMessagesTokens(requestMessages, tools);
-    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
+    inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
+    outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));
 
     // concat tool assistant
     const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
@@ -619,8 +643,10 @@ async function streamResponse({
   let callingTool: { name: string; arguments: string } | null = null;
   let toolCalls: ChatCompletionMessageToolCall[] = [];
   let finishReason: CompletionFinishReason = null;
+  let usage = getLLMDefaultUsage();
 
   for await (const part of stream) {
+    usage = part.usage || usage;
     if (res.closed) {
       stream.controller?.abort();
       finishReason = 'close';
@@ -644,6 +670,7 @@ async function streamResponse({
       });
     }
     if (responseChoice?.tool_calls?.[0]) {
+      // @ts-ignore
       const toolCall: ChatCompletionMessageToolCall = responseChoice.tool_calls[0];
       // In a stream response, only one tool is returned at a time.  If have id, description is executing a tool
       if (toolCall.id || callingTool) {
@@ -715,5 +742,5 @@ async function streamResponse({
     }
   }
 
-  return { answer: textAnswer, toolCalls, finish_reason: finishReason };
+  return { answer: textAnswer, toolCalls, finish_reason: finishReason, usage };
 }
diff --git a/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts b/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts
index 52ec7c4bc..d55a0c98d 100644
--- a/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts
@@ -36,7 +36,6 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
 
 export type RunToolResponse = {
   dispatchFlowResponse: DispatchFlowResponse[];
-  toolNodeTokens?: number; // deprecated
   toolNodeInputTokens: number;
   toolNodeOutputTokens: number;
   completeMessages?: ChatCompletionMessageParam[];
diff --git a/packages/service/core/workflow/dispatch/chat/oneapi.ts b/packages/service/core/workflow/dispatch/chat/oneapi.ts
index a96356ced..d8d9561db 100644
--- a/packages/service/core/workflow/dispatch/chat/oneapi.ts
+++ b/packages/service/core/workflow/dispatch/chat/oneapi.ts
@@ -9,11 +9,15 @@ import { createChatCompletion } from '../../../ai/config';
 import type {
   ChatCompletionMessageParam,
   CompletionFinishReason,
+  CompletionUsage,
   StreamChatType
 } from '@fastgpt/global/core/ai/type.d';
 import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
 import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
-import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  getLLMDefaultUsage
+} from '@fastgpt/global/core/ai/constants';
 import type {
   ChatDispatchProps,
   DispatchNodeResultType
@@ -199,17 +203,19 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
     }
   });
 
-  const { answerText, reasoningText, finish_reason } = await (async () => {
+  let { answerText, reasoningText, finish_reason, inputTokens, outputTokens } = await (async () => {
     if (isStreamResponse) {
-      if (!res) {
+      if (!res || res.closed) {
         return {
           answerText: '',
           reasoningText: '',
-          finish_reason: 'close' as const
+          finish_reason: 'close' as const,
+          inputTokens: 0,
+          outputTokens: 0
         };
       }
       // sse response
-      const { answer, reasoning, finish_reason } = await streamResponse({
+      const { answer, reasoning, finish_reason, usage } = await streamResponse({
         res,
         stream: response,
         aiChatReasoning,
@@ -221,10 +227,13 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
       return {
         answerText: answer,
         reasoningText: reasoning,
-        finish_reason
+        finish_reason,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
       };
     } else {
       const finish_reason = response.choices?.[0]?.finish_reason as CompletionFinishReason;
+      const usage = response.usage;
 
       const { content, reasoningContent } = (() => {
         const content = response.choices?.[0]?.message?.content || '';
@@ -269,7 +278,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
       return {
         answerText: content,
         reasoningText: reasoningContent,
-        finish_reason
+        finish_reason,
+        inputTokens: usage?.prompt_tokens,
+        outputTokens: usage?.completion_tokens
       };
     }
   })();
@@ -289,8 +300,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
   const completeMessages = [...requestMessages, ...AIMessages];
   const chatCompleteMessages = GPTMessages2Chats(completeMessages);
 
-  const inputTokens = await countGptMessagesTokens(requestMessages);
-  const outputTokens = await countGptMessagesTokens(AIMessages);
+  inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
+  outputTokens = outputTokens || (await countGptMessagesTokens(AIMessages));
 
   const { totalPoints, modelName } = formatModelChars2Points({
     model,
@@ -305,7 +316,6 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
     [DispatchNodeResponseKeyEnum.nodeResponse]: {
       totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
       model: modelName,
-      tokens: inputTokens + outputTokens,
       inputTokens: inputTokens,
       outputTokens: outputTokens,
       query: `${userChatInput}`,
@@ -565,9 +575,13 @@ async function streamResponse({
   let answer = '';
   let reasoning = '';
   let finish_reason: CompletionFinishReason = null;
+  let usage: CompletionUsage = getLLMDefaultUsage();
+
   const { parsePart, getStartTagBuffer } = parseReasoningStreamContent();
 
   for await (const part of stream) {
+    usage = part.usage || usage;
+
     if (res.closed) {
       stream.controller?.abort();
       finish_reason = 'close';
@@ -614,5 +628,5 @@ async function streamResponse({
     }
   }
 
-  return { answer, reasoning, finish_reason };
+  return { answer, reasoning, finish_reason, usage };
 }
diff --git a/packages/web/i18n/en/chat.json b/packages/web/i18n/en/chat.json
index 8ae4c80ea..014983e5c 100644
--- a/packages/web/i18n/en/chat.json
+++ b/packages/web/i18n/en/chat.json
@@ -45,6 +45,7 @@
   "insert_input_guide,_some_data_already_exists": "Duplicate data detected, automatically filtered, {{len}} items inserted",
   "is_chatting": "Chatting in progress... please wait until it finishes",
   "items": "Items",
+  "llm_tokens": "LLM tokens",
   "module_runtime_and": "Total Module Runtime",
   "multiple_AI_conversations": "Multiple AI Conversations",
   "new_input_guide_lexicon": "New Lexicon",
diff --git a/packages/web/i18n/en/common.json b/packages/web/i18n/en/common.json
index 69bbaf4bf..60f941690 100644
--- a/packages/web/i18n/en/common.json
+++ b/packages/web/i18n/en/common.json
@@ -3,7 +3,7 @@
   "Click_to_expand": "Click to expand",
   "Download": "Download",
   "Export": "Export",
-  "FAQ.ai_point_a": "Each time you use the AI model, a certain amount of AI points will be deducted. For detailed calculation standards, please refer to the 'AI Points Calculation Standards' above.\nToken calculation uses the same formula as GPT-3.5, where 1 Token ≈ 0.7 Chinese characters ≈ 0.9 English words. Consecutive characters may be considered as 1 Token.",
+  "FAQ.ai_point_a": "Each time an AI model is called, a certain amount of AI points will be consumed. \nFor specific calculation standards, please refer to the \"AI integral calculation standards\" above. \nThe system will give priority to the actual usage returned by the model manufacturer. If it is empty, the calculation method of GPT3.5 is used for estimation. 1Token≈0.7 Chinese characters ≈0.9 English words, and the characters that appear continuously may be considered as 1 Tokens.",
   "FAQ.ai_point_expire_a": "Yes, they will expire. After the current package expires, the AI points will be reset to the new package's AI points. Annual package AI points are valid for one year, not monthly.",
   "FAQ.ai_point_expire_q": "Do AI points expire?",
   "FAQ.ai_point_q": "What are AI points?",
@@ -490,18 +490,15 @@
   "core.chat.response.module historyPreview": "History Preview (Only Partial Content Displayed)",
   "core.chat.response.module http result": "Response Body",
   "core.chat.response.module if else Result": "Condition Result",
-  "core.chat.response.module input tokens": "input tokens",
   "core.chat.response.module limit": "Single Search Limit",
   "core.chat.response.module maxToken": "Max Response Tokens",
   "core.chat.response.module model": "Model",
   "core.chat.response.module name": "Model Name",
-  "core.chat.response.module output tokens": "output tokens",
   "core.chat.response.module query": "Question/Search Term",
   "core.chat.response.module quoteList": "Quote Content",
   "core.chat.response.module similarity": "Similarity",
   "core.chat.response.module temperature": "Temperature",
   "core.chat.response.module time": "Run Time",
-  "core.chat.response.module tokens": "Total Tokens",
   "core.chat.response.plugin output": "Plugin Output Value",
   "core.chat.response.search using reRank": "Result Re-Rank",
   "core.chat.response.text output": "Text Output",
diff --git a/packages/web/i18n/zh-CN/chat.json b/packages/web/i18n/zh-CN/chat.json
index 9c2af1944..abf7357a5 100644
--- a/packages/web/i18n/zh-CN/chat.json
+++ b/packages/web/i18n/zh-CN/chat.json
@@ -45,6 +45,7 @@
   "insert_input_guide,_some_data_already_exists": "有重复数据，已自动过滤，共插入 {{len}} 条数据",
   "is_chatting": "正在聊天中...请等待结束",
   "items": "条",
+  "llm_tokens": "LLM tokens",
   "module_runtime_and": "工作流总运行时间",
   "multiple_AI_conversations": "多组 AI 对话",
   "new_input_guide_lexicon": "新词库",
diff --git a/packages/web/i18n/zh-CN/common.json b/packages/web/i18n/zh-CN/common.json
index 3e90030cc..c8079a64e 100644
--- a/packages/web/i18n/zh-CN/common.json
+++ b/packages/web/i18n/zh-CN/common.json
@@ -3,7 +3,7 @@
   "Click_to_expand": "点击查看详情",
   "Download": "下载",
   "Export": "导出",
-  "FAQ.ai_point_a": "每次调用AI模型时，都会消耗一定的AI积分。具体的计算标准可参考上方的“AI 积分计算标准”。\nToken计算采用GPT3.5相同公式，1Token≈0.7中文字符≈0.9英文单词，连续出现的字符可能被认为是1个Tokens。",
+  "FAQ.ai_point_a": "每次调用AI模型时，都会消耗一定的AI积分。具体的计算标准可参考上方的“AI 积分计算标准”。系统会优先采用模型厂商返回的实际 usage，若为空，则采用GPT3.5的计算方式进行估算，1Token≈0.7中文字符≈0.9英文单词，连续出现的字符可能被认为是1个Tokens。",
   "FAQ.ai_point_expire_a": "会过期。当前套餐过期后，AI积分将会清空，并更新为新套餐的AI积分。年度套餐的AI积分时长为1年，而不是每个月。",
   "FAQ.ai_point_expire_q": "AI积分会过期么？",
   "FAQ.ai_point_q": "什么是AI积分？",
@@ -489,18 +489,15 @@
   "core.chat.response.module historyPreview": "记录预览(仅展示部分内容)",
   "core.chat.response.module http result": "响应体",
   "core.chat.response.module if else Result": "判断器结果",
-  "core.chat.response.module input tokens": "输入 Tokens",
   "core.chat.response.module limit": "单次搜索上限",
   "core.chat.response.module maxToken": "最大响应 tokens",
   "core.chat.response.module model": "模型",
   "core.chat.response.module name": "模型名",
-  "core.chat.response.module output tokens": "输出 Tokens",
   "core.chat.response.module query": "问题/检索词",
   "core.chat.response.module quoteList": "引用内容",
   "core.chat.response.module similarity": "相似度",
   "core.chat.response.module temperature": "温度",
   "core.chat.response.module time": "运行时长",
-  "core.chat.response.module tokens": "AI Tokens总量",
   "core.chat.response.plugin output": "插件输出值",
   "core.chat.response.search using reRank": "结果重排",
   "core.chat.response.text output": "文本输出",
diff --git a/packages/web/i18n/zh-Hant/chat.json b/packages/web/i18n/zh-Hant/chat.json
index 40761b20f..8ba9246a6 100644
--- a/packages/web/i18n/zh-Hant/chat.json
+++ b/packages/web/i18n/zh-Hant/chat.json
@@ -43,6 +43,7 @@
   "insert_input_guide,_some_data_already_exists": "偵測到重複資料，已自動過濾，共插入 {{len}} 筆資料",
   "is_chatting": "對話進行中...請稍候",
   "items": "筆",
+  "llm_tokens": "LLM tokens",
   "module_runtime_and": "模組執行總時間",
   "moveCancel": "上滑取消",
   "multiple_AI_conversations": "多組 AI 對話",
diff --git a/packages/web/i18n/zh-Hant/common.json b/packages/web/i18n/zh-Hant/common.json
index ea88eeacd..f3b50c1e8 100644
--- a/packages/web/i18n/zh-Hant/common.json
+++ b/packages/web/i18n/zh-Hant/common.json
@@ -3,7 +3,7 @@
   "Click_to_expand": "點選檢視詳細資訊",
   "Download": "下載",
   "Export": "匯出",
-  "FAQ.ai_point_a": "每次呼叫 AI 模型時，都會消耗一定數量的 AI 點數。詳細的計算標準請參考上方的「AI 點數計算標準」。\nToken 計算採用與 GPT3.5 相同的公式，1 Token ≈ 0.7 個中文字 ≈ 0.9 個英文單字，連續出現的字元可能會被視為 1 個 Token。",
+  "FAQ.ai_point_a": "每次調用AI模型時，都會消耗一定的AI積分。\n具體的計算標準可參考上方的“AI 積分計算標準”。\n系統會優先採用模型廠商返回的實際 usage，若為空，則採用GPT3.5的計算方式進行估算，1Token≈0.7中文字符≈0.9英文單詞，連續出現的字符可能被認為是1個Tokens。",
   "FAQ.ai_point_expire_a": "會過期。目前方案過期後，AI 點數將會清空並更新為新方案的 AI 點數。年度方案的 AI 點數有效期為一年，而不是每個月重設。",
   "FAQ.ai_point_expire_q": "AI 點數會過期嗎？",
   "FAQ.ai_point_q": "什麼是 AI 點數？",
@@ -489,18 +489,15 @@
   "core.chat.response.module historyPreview": "記錄預覽（僅顯示部分內容）",
   "core.chat.response.module http result": "回應內容",
   "core.chat.response.module if else Result": "條件判斷結果",
-  "core.chat.response.module input tokens": "輸入 tokens",
   "core.chat.response.module limit": "單次搜尋上限",
   "core.chat.response.module maxToken": "最大回應 Token 數",
   "core.chat.response.module model": "模型",
   "core.chat.response.module name": "模型名稱",
-  "core.chat.response.module output tokens": "輸出 tokens",
   "core.chat.response.module query": "問題/搜尋詞",
   "core.chat.response.module quoteList": "引用內容",
   "core.chat.response.module similarity": "相似度",
   "core.chat.response.module temperature": "溫度",
   "core.chat.response.module time": "執行時長",
-  "core.chat.response.module tokens": "總 Token 數",
   "core.chat.response.plugin output": "外掛程式輸出值",
   "core.chat.response.search using reRank": "結果重新排名",
   "core.chat.response.text output": "文字輸出",
diff --git a/projects/app/src/components/core/chat/components/WholeResponseModal.tsx b/projects/app/src/components/core/chat/components/WholeResponseModal.tsx
index 7aaa9104f..b364862f3 100644
--- a/projects/app/src/components/core/chat/components/WholeResponseModal.tsx
+++ b/projects/app/src/components/core/chat/components/WholeResponseModal.tsx
@@ -154,30 +154,21 @@ export const WholeResponseContent = ({
           value={`${activeModule?.runningTime || 0}s`}
         />
         <Row label={t('common:core.chat.response.module model')} value={activeModule?.model} />
-        <Row
-          label={t('common:core.chat.response.module tokens')}
-          value={`${activeModule?.tokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.module input tokens')}
-          value={`${activeModule?.inputTokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.module output tokens')}
-          value={`${activeModule?.outputTokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.Tool call tokens')}
-          value={`${activeModule?.toolCallTokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.Tool call input tokens')}
-          value={`${activeModule?.toolCallInputTokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.Tool call output tokens')}
-          value={`${activeModule?.toolCallOutputTokens}`}
-        />
+        {activeModule?.tokens && (
+          <Row label={t('chat:llm_tokens')} value={`${activeModule?.tokens}`} />
+        )}
+        {(!!activeModule?.inputTokens || !!activeModule?.outputTokens) && (
+          <Row
+            label={t('chat:llm_tokens')}
+            value={`Input/Output = ${activeModule?.inputTokens || 0}/${activeModule?.outputTokens || 0}`}
+          />
+        )}
+        {(!!activeModule?.toolCallInputTokens || !!activeModule?.toolCallOutputTokens) && (
+          <Row
+            label={t('common:core.chat.response.Tool call tokens')}
+            value={`Input/Output = ${activeModule?.toolCallInputTokens || 0}/${activeModule?.toolCallOutputTokens || 0}`}
+          />
+        )}
 
         <Row label={t('common:core.chat.response.module query')} value={activeModule?.query} />
         <Row
diff --git a/projects/app/src/pages/api/core/ai/model/test.ts b/projects/app/src/pages/api/core/ai/model/test.ts
index beea912ad..7b96281a1 100644
--- a/projects/app/src/pages/api/core/ai/model/test.ts
+++ b/projects/app/src/pages/api/core/ai/model/test.ts
@@ -16,7 +16,7 @@ import { reRankRecall } from '@fastgpt/service/core/ai/rerank';
 import { aiTranscriptions } from '@fastgpt/service/core/ai/audio/transcriptions';
 import { isProduction } from '@fastgpt/global/common/system/constants';
 import * as fs from 'fs';
-import { llmCompletionsBodyFormat } from '@fastgpt/service/core/ai/utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '@fastgpt/service/core/ai/utils';
 
 export type testQuery = { model: string; channelId?: number };
 
@@ -88,23 +88,10 @@ const testLLMModel = async (model: LLMModelItemType, headers: Record<string, str
       }
     }
   });
+  const { text: answer } = await llmResponseToAnswerText(response);
 
-  if (isStreamResponse) {
-    for await (const part of response) {
-      const content = part.choices?.[0]?.delta?.content || '';
-      // @ts-ignore
-      const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
-      if (content || reasoningContent) {
-        response?.controller?.abort();
-        return;
-      }
-    }
-  } else {
-    addLog.info(`Model not stream response`);
-    const answer = response.choices?.[0]?.message?.content || '';
-    if (answer) {
-      return answer;
-    }
+  if (answer) {
+    return answer;
   }
 
   return Promise.reject('Model response empty');
diff --git a/projects/app/src/service/events/generateQA.ts b/projects/app/src/service/events/generateQA.ts
index 69a5b4089..8356cc8d6 100644
--- a/projects/app/src/service/events/generateQA.ts
+++ b/projects/app/src/service/events/generateQA.ts
@@ -2,7 +2,7 @@ import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/sch
 import { pushQAUsage } from '@/service/support/wallet/usage/push';
 import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
 import { createChatCompletion } from '@fastgpt/service/core/ai/config';
-import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d';
+import type { ChatCompletionMessageParam, StreamChatType } from '@fastgpt/global/core/ai/type.d';
 import { addLog } from '@fastgpt/service/common/system/log';
 import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
 import { replaceVariable } from '@fastgpt/global/common/string/tools';
@@ -17,10 +17,7 @@ import {
 } from '@fastgpt/service/common/string/tiktoken/index';
 import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
 import { loadRequestMessages } from '@fastgpt/service/core/chat/utils';
-import {
-  llmCompletionsBodyFormat,
-  llmStreamResponseToAnswerText
-} from '@fastgpt/service/core/ai/utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '@fastgpt/service/core/ai/utils';
 import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
 import {
   chunkAutoChunkSize,
@@ -143,7 +140,9 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
         modelData
       )
     });
-    const answer = await llmStreamResponseToAnswerText(chatResponse);
+    const { text: answer, usage } = await llmResponseToAnswerText(chatResponse);
+    const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
+    const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
 
     const qaArr = formatSplitText({ answer, rawText: text, llmModel: modelData }); // 格式化后的QA对
 
@@ -167,15 +166,15 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
     pushQAUsage({
       teamId: data.teamId,
       tmbId: data.tmbId,
-      inputTokens: await countGptMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer),
+      inputTokens,
+      outputTokens,
       billId: data.billId,
       model: modelData.model
     });
     addLog.info(`[QA Queue] Finish`, {
       time: Date.now() - startTime,
       splitLength: qaArr.length,
-      usage: chatResponse.usage
+      usage
     });
 
     return reduceQueueAndReturn();