V4.8.17 feature (#3493)

* split tokens into input and output (#3477) * split tokens into input and output * query extension & tool call & question guide * fix * perf: input and output tokens * perf: tool call if else * perf: remove code * fix: extract usage count * fix: qa usage count --------- Co-authored-by: heheer <heheer@sealos.io>
2025-07-21 11:43:56 +00:00 · 2024-12-30 10:13:25 +08:00
parent da2831b948
commit 50bf7f9a3b
46 changed files with 467 additions and 230 deletions
--- a/docSite/content/zh-cn/docs/development/upgrading/4817.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4817.md
@@ -28,6 +28,11 @@ curl --location --request POST 'https://{{host}}/api/admin/initv4817' \

 会将用户绑定的 OpenAI 账号移动到团队中。

+
+## 调整 completions 接口返回值
+
+/api/v1/chat/completions 接口返回值调整，对话节点、工具节点等使用到模型的节点，将不再返回 `tokens` 字段，改为返回 `inputTokens` 和 `outputTokens` 字段，分别表示输入和输出的 Token 数量。
+
 ## 完整更新内容

 1. 新增 - 简易模式工具调用支持数组类型插件。
@@ -36,10 +41,12 @@ curl --location --request POST 'https://{{host}}/api/admin/initv4817' \
 4. 新增 - 商业版支持后台配置模板市场。
 5. 新增 - 商业版支持后台配置自定义工作流变量，用于与业务系统鉴权打通。
 6. 新增 - 搜索测试接口支持问题优化。
-7. 优化 - Markdown 大小测试，超出 20 万字符不使用 Markdown 组件，避免崩溃。
-8. 优化 - 知识库搜索参数，滑动条支持输入模式，可以更精准的控制。
-9. 优化 - 可用模型展示
-10. 优化 - Mongo 查询语句，增加 virtual 字段。
-11. 修复 - 文件返回接口缺少 Content-Length 头，导致通过非同源文件上传时，阿里 vision 模型无法识别图片。
-12. 修复 - 去除判断器两端字符串隐藏换行符，避免判断器失效。
-13. 修复 - 变量更新节点，手动输入更新内容时候，非字符串类型数据类型无法自动转化。
+7. 新增 - 工作流中 Input Token 和 Output Token 分开记录展示。并修复部分请求未记录输出 Token 计费问题。
+8. 优化 - Markdown 大小测试，超出 20 万字符不使用 Markdown 组件，避免崩溃。
+9. 优化 - 知识库搜索参数，滑动条支持输入模式，可以更精准的控制。
+10. 优化 - 可用模型展示UI。
+11. 优化 - Mongo 查询语句，增加 virtual 字段。
+12. 修复 - 文件返回接口缺少 Content-Length 头，导致通过非同源文件上传时，阿里 vision 模型无法识别图片。
+13. 修复 - 去除判断器两端字符串隐藏换行符，避免判断器失效。
+14. 修复 - 变量更新节点，手动输入更新内容时候，非字符串类型数据类型无法自动转化。
+15. 修复 - 豆包模型无法工具调用。
--- a/packages/global/core/ai/model.d.ts
+++ b/packages/global/core/ai/model.d.ts
@@ -1,6 +1,13 @@
 import type { ModelProviderIdType } from './provider';

-export type LLMModelItemType = {
+type PriceType = {
+  charsPointsPrice?: number; // 1k chars=n points; 60s=n points;
+
+  // If inputPrice is set, the input-output charging scheme is adopted
+  inputPrice?: number; // 1k tokens=n points
+  outputPrice?: number; // 1k tokens=n points
+};
+export type LLMModelItemType = PriceType & {
  provider: ModelProviderIdType;
  model: string;
  name: string;
@@ -10,8 +17,6 @@ export type LLMModelItemType = {
  quoteMaxToken: number;
  maxTemperature: number;

-  charsPointsPrice: number; // 1k chars=n points
-
  censor?: boolean;
  vision?: boolean;

@@ -33,13 +38,12 @@ export type LLMModelItemType = {
  fieldMap?: Record<string, string>;
 };

-export type VectorModelItemType = {
+export type VectorModelItemType = PriceType & {
  provider: ModelProviderIdType;
  model: string; // model name
  name: string; // show name
  avatar?: string;
  defaultToken: number; // split text default token
-  charsPointsPrice: number; // 1k tokens=n points
  maxToken: number; // model max token
  weight: number; // training weight
  hidden?: boolean; // Disallow creation
@@ -48,25 +52,22 @@ export type VectorModelItemType = {
  queryConfig?: Record<string, any>; // Custom parameters for query
 };

-export type ReRankModelItemType = {
+export type ReRankModelItemType = PriceType & {
  model: string;
  name: string;
-  charsPointsPrice: number;
  requestUrl: string;
  requestAuth: string;
 };

-export type AudioSpeechModelType = {
+export type AudioSpeechModelType = PriceType & {
  provider: ModelProviderIdType;
  model: string;
  name: string;
-  charsPointsPrice: number;
  voices: { label: string; value: string; bufferId: string }[];
 };

-export type STTModelType = {
+export type STTModelType = PriceType & {
  provider: ModelProviderIdType;
  model: string;
  name: string;
-  charsPointsPrice: number; // 60s = n points
 };
--- a/packages/global/core/workflow/runtime/type.d.ts
+++ b/packages/global/core/workflow/runtime/type.d.ts
@@ -107,7 +107,9 @@ export type DispatchNodeResponseType = {
  mergeSignId?: string;

  // bill
-  tokens?: number;
+  tokens?: number; // deprecated
+  inputTokens?: number;
+  outputTokens?: number;
  model?: string;
  contextTotalLen?: number;
  totalPoints?: number;
@@ -157,6 +159,8 @@ export type DispatchNodeResponseType = {

  // tool
  toolCallTokens?: number;
+  toolCallInputTokens?: number;
+  toolCallOutputTokens?: number;
  toolDetail?: ChatHistoryItemResType[];
  toolStop?: boolean;

--- a/packages/global/support/wallet/bill/type.d.ts
+++ b/packages/global/support/wallet/bill/type.d.ts
@@ -23,7 +23,8 @@ export type BillSchemaType = {
 };

 export type ChatNodeUsageType = {
-  tokens?: number;
+  inputTokens?: number;
+  outputTokens?: number;
  totalPoints: number;
  moduleName: string;
  model?: string;
--- a/packages/global/support/wallet/usage/type.d.ts
+++ b/packages/global/support/wallet/usage/type.d.ts
@@ -2,9 +2,13 @@ import { CreateUsageProps } from './api';
 import { UsageSourceEnum } from './constants';

 export type UsageListItemCountType = {
-  tokens?: number;
+  inputTokens?: number;
+  outputTokens?: number;
  charsLength?: number;
  duration?: number;
+
+  // deprecated
+  tokens?: number;
 };
 export type UsageListItemType = UsageListItemCountType & {
  moduleName: string;
--- a/packages/plugins/src/feishu/template.json
+++ b/packages/plugins/src/feishu/template.json
@@ -2,7 +2,7 @@
  "author": "",
  "version": "488",
  "name": "飞书 webhook",
-  "avatar": "/appMarketTemplates/plugin-feishu/avatar.svg",
+  "avatar": "core/app/templates/plugin-feishu",
  "intro": "向飞书机器人发起 webhook 请求。",
  "courseUrl": "https://open.feishu.cn/document/client-docs/bot-v3/add-custom-bot#f62e72d5",
  "showStatus": false,
--- a/packages/service/common/system/tools.ts
+++ b/packages/service/common/system/tools.ts
@@ -15,6 +15,9 @@ export const initFastGPTConfig = (config?: FastGPTConfigFileType) => {
  global.subPlans = config.subPlans;

  global.llmModels = config.llmModels;
+  global.llmModelPriceType = global.llmModels.some((item) => typeof item.inputPrice === 'number')
+    ? 'IO'
+    : 'Tokens';
  global.vectorModels = config.vectorModels;
  global.audioSpeechModels = config.audioSpeechModels;
  global.whisperModel = config.whisperModel;
--- a/packages/service/core/ai/functions/createQuestionGuide.ts
+++ b/packages/service/core/ai/functions/createQuestionGuide.ts
@@ -1,6 +1,6 @@
 import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d';
 import { createChatCompletion } from '../config';
-import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
+import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
 import { loadRequestMessages } from '../../chat/utils';
 import { llmCompletionsBodyFormat } from '../utils';
 import {
@@ -20,7 +20,8 @@ export async function createQuestionGuide({
  customPrompt?: string;
 }): Promise<{
  result: string[];
-  tokens: number;
+  inputTokens: number;
+  outputTokens: number;
 }> {
  const concatMessages: ChatCompletionMessageParam[] = [
    ...messages,
@@ -29,6 +30,10 @@ export async function createQuestionGuide({
      content: `${customPrompt || PROMPT_QUESTION_GUIDE}\n${PROMPT_QUESTION_GUIDE_FOOTER}`
    }
  ];
+  const requestMessages = await loadRequestMessages({
+    messages: concatMessages,
+    useVision: false
+  });

  const { response: data } = await createChatCompletion({
    body: llmCompletionsBodyFormat(
@@ -36,10 +41,7 @@ export async function createQuestionGuide({
        model,
        temperature: 0.1,
        max_tokens: 200,
-        messages: await loadRequestMessages({
-          messages: concatMessages,
-          useVision: false
-        }),
+        messages: requestMessages,
        stream: false
      },
      model
@@ -51,13 +53,15 @@ export async function createQuestionGuide({
  const start = answer.indexOf('[');
  const end = answer.lastIndexOf(']');

-  const tokens = await countGptMessagesTokens(concatMessages);
+  const inputTokens = await countGptMessagesTokens(requestMessages);
+  const outputTokens = await countPromptTokens(answer);

  if (start === -1 || end === -1) {
    addLog.warn('Create question guide error', { answer });
    return {
      result: [],
-      tokens: 0
+      inputTokens: 0,
+      outputTokens: 0
    };
  }

@@ -69,14 +73,16 @@ export async function createQuestionGuide({
  try {
    return {
      result: json5.parse(jsonStr),
-      tokens
+      inputTokens,
+      outputTokens
    };
  } catch (error) {
    console.log(error);

    return {
      result: [],
-      tokens: 0
+      inputTokens: 0,
+      outputTokens: 0
    };
  }
 }
--- a/packages/service/core/ai/functions/queryExtension.ts
+++ b/packages/service/core/ai/functions/queryExtension.ts
@@ -1,7 +1,7 @@
 import { replaceVariable } from '@fastgpt/global/common/string/tools';
 import { createChatCompletion } from '../config';
 import { ChatItemType } from '@fastgpt/global/core/chat/type';
-import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
+import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
 import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
 import { getLLMModel } from '../model';
 import { llmCompletionsBodyFormat } from '../utils';
@@ -121,7 +121,8 @@ export const queryExtension = async ({
  rawQuery: string;
  extensionQueries: string[];
  model: string;
-  tokens: number;
+  inputTokens: number;
+  outputTokens: number;
 }> => {
  const systemFewShot = chatBg
    ? `Q: 对话背景。
@@ -166,7 +167,8 @@ A: ${chatBg}
      rawQuery: query,
      extensionQueries: [],
      model,
-      tokens: 0
+      inputTokens: 0,
+      outputTokens: 0
    };
  }

@@ -181,7 +183,8 @@ A: ${chatBg}
      rawQuery: query,
      extensionQueries: Array.isArray(queries) ? queries : [],
      model,
-      tokens: await countGptMessagesTokens(messages)
+      inputTokens: await countGptMessagesTokens(messages),
+      outputTokens: await countPromptTokens(answer)
    };
  } catch (error) {
    addLog.error(`Query extension error`, error);
@@ -189,7 +192,8 @@ A: ${chatBg}
      rawQuery: query,
      extensionQueries: [],
      model,
-      tokens: 0
+      inputTokens: 0,
+      outputTokens: 0
    };
  }
 };
--- a/packages/service/core/ai/model.ts
+++ b/packages/service/core/ai/model.ts
@@ -4,6 +4,7 @@ export const getLLMModel = (model?: string) => {
    global.llmModels[0]
  );
 };
+
 export const getDatasetModel = (model?: string) => {
  return (
    global.llmModels
--- a/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts
+++ b/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts
@@ -1,5 +1,8 @@
 import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
-import { countMessagesTokens } from '../../../../common/string/tiktoken/index';
+import {
+  countGptMessagesTokens,
+  countPromptTokens
+} from '../../../../common/string/tiktoken/index';
 import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
 import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
 import { createChatCompletion } from '../../../ai/config';
@@ -49,7 +52,7 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse

  const chatHistories = getHistories(history, histories);

-  const { arg, tokens } = await completions({
+  const { arg, inputTokens, outputTokens } = await completions({
    ...props,
    histories: chatHistories,
    cqModel
@@ -59,7 +62,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse

  const { totalPoints, modelName } = formatModelChars2Points({
    model: cqModel.model,
-    tokens,
+    inputTokens: inputTokens,
+    outputTokens: outputTokens,
    modelType: ModelTypeEnum.llm
  });

@@ -72,7 +76,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
      totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
      model: modelName,
      query: userChatInput,
-      tokens,
+      inputTokens: inputTokens,
+      outputTokens: outputTokens,
      cqList: agents,
      cqResult: result.value,
      contextTotalLen: chatHistories.length + 2
@@ -82,7 +87,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
        moduleName: name,
        totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
        model: modelName,
-        tokens
+        inputTokens: inputTokens,
+        outputTokens: outputTokens
      }
    ]
  };
@@ -148,7 +154,8 @@ const completions = async ({
  }

  return {
-    tokens: await countMessagesTokens(messages),
+    inputTokens: await countGptMessagesTokens(requestMessages),
+    outputTokens: await countPromptTokens(answer),
    arg: { type: id }
  };
 };
--- a/packages/service/core/workflow/dispatch/agent/extract.ts
+++ b/packages/service/core/workflow/dispatch/agent/extract.ts
@@ -3,7 +3,8 @@ import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/
 import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
 import {
  countMessagesTokens,
-  countGptMessagesTokens
+  countGptMessagesTokens,
+  countPromptTokens
 } from '../../../../common/string/tiktoken/index';
 import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
 import { createChatCompletion } from '../../../ai/config';
@@ -59,7 +60,7 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
  const extractModel = getLLMModel(model);
  const chatHistories = getHistories(history, histories);

-  const { arg, tokens } = await (async () => {
+  const { arg, inputTokens, outputTokens } = await (async () => {
    if (extractModel.toolChoice) {
      return toolChoice({
        ...props,
@@ -114,7 +115,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {

  const { totalPoints, modelName } = formatModelChars2Points({
    model: extractModel.model,
-    tokens,
+    inputTokens: inputTokens,
+    outputTokens: outputTokens,
    modelType: ModelTypeEnum.llm
  });

@@ -126,7 +128,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
      totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
      model: modelName,
      query: content,
-      tokens,
+      inputTokens,
+      outputTokens,
      extractDescription: description,
      extractResult: arg,
      contextTotalLen: chatHistories.length + 2
@@ -136,7 +139,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
        moduleName: name,
        totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
        model: modelName,
-        tokens
+        inputTokens,
+        outputTokens
      }
    ]
  };
@@ -249,15 +253,18 @@ const toolChoice = async (props: ActionProps) => {
    }
  })();

-  const completeMessages: ChatCompletionMessageParam[] = [
-    ...filterMessages,
+  const AIMessages: ChatCompletionMessageParam[] = [
    {
      role: ChatCompletionRequestMessageRoleEnum.Assistant,
      tool_calls: response.choices?.[0]?.message?.tool_calls
    }
  ];
+
+  const inputTokens = await countGptMessagesTokens(filterMessages, tools);
+  const outputTokens = await countGptMessagesTokens(AIMessages);
  return {
-    tokens: await countGptMessagesTokens(completeMessages, tools),
+    inputTokens,
+    outputTokens,
    arg
  };
 };
@@ -286,17 +293,21 @@ const functionCall = async (props: ActionProps) => {

  try {
    const arg = JSON.parse(response?.choices?.[0]?.message?.function_call?.arguments || '');
-    const completeMessages: ChatCompletionMessageParam[] = [
-      ...filterMessages,
+
+    const AIMessages: ChatCompletionMessageParam[] = [
      {
        role: ChatCompletionRequestMessageRoleEnum.Assistant,
        function_call: response.choices?.[0]?.message?.function_call
      }
    ];

+    const inputTokens = await countGptMessagesTokens(filterMessages, undefined, functions);
+    const outputTokens = await countGptMessagesTokens(AIMessages);
+
    return {
      arg,
-      tokens: await countGptMessagesTokens(completeMessages, undefined, functions)
+      inputTokens,
+      outputTokens
    };
  } catch (error) {
    console.log(response.choices?.[0]?.message);
@@ -305,7 +316,8 @@ const functionCall = async (props: ActionProps) => {

    return {
      arg: {},
-      tokens: 0
+      inputTokens: 0,
+      outputTokens: 0
    };
  }
 };
@@ -370,7 +382,8 @@ Human: ${content}`
  if (!jsonStr) {
    return {
      rawResponse: answer,
-      tokens: await countMessagesTokens(messages),
+      inputTokens: await countMessagesTokens(messages),
+      outputTokens: await countPromptTokens(answer),
      arg: {}
    };
  }
@@ -378,7 +391,8 @@ Human: ${content}`
  try {
    return {
      rawResponse: answer,
-      tokens: await countMessagesTokens(messages),
+      inputTokens: await countMessagesTokens(messages),
+      outputTokens: await countPromptTokens(answer),
      arg: json5.parse(jsonStr) as Record<string, any>
    };
  } catch (error) {
@@ -386,7 +400,8 @@ Human: ${content}`
    console.log(error);
    return {
      rawResponse: answer,
-      tokens: await countMessagesTokens(messages),
+      inputTokens: await countMessagesTokens(messages),
+      outputTokens: await countPromptTokens(answer),
      arg: {}
    };
  }
--- a/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts
@@ -109,7 +109,8 @@ export const runToolWithFunctionCall = async (

      return {
        dispatchFlowResponse: [toolRunResponse],
-        toolNodeTokens: 0,
+        toolNodeInputTokens: 0,
+        toolNodeOutputTokens: 0,
        completeMessages: requestMessages,
        assistantResponses: toolRunResponse.assistantResponses,
        runTimes: toolRunResponse.runTimes,
@@ -126,7 +127,8 @@ export const runToolWithFunctionCall = async (
      },
      {
        dispatchFlowResponse: [toolRunResponse],
-        toolNodeTokens: 0,
+        toolNodeInputTokens: 0,
+        toolNodeOutputTokens: 0,
        assistantResponses: toolRunResponse.assistantResponses,
        runTimes: toolRunResponse.runTimes
      }
@@ -340,7 +342,9 @@ export const runToolWithFunctionCall = async (
      assistantToolMsgParams
    ] as ChatCompletionMessageParam[];
    // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
-    const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
+    // const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
+    const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
+    const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
    /* 
      ...
      user
@@ -375,7 +379,12 @@ export const runToolWithFunctionCall = async (
    const runTimes =
      (response?.runTimes || 0) +
      flatToolsResponseData.reduce((sum, item) => sum + item.runTimes, 0);
-    const toolNodeTokens = response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens;
+    const toolNodeInputTokens = response?.toolNodeInputTokens
+      ? response.toolNodeInputTokens + inputTokens
+      : inputTokens;
+    const toolNodeOutputTokens = response?.toolNodeOutputTokens
+      ? response.toolNodeOutputTokens + outputTokens
+      : outputTokens;

    // Check stop signal
    const hasStopSignal = flatToolsResponseData.some(
@@ -408,7 +417,8 @@ export const runToolWithFunctionCall = async (

      return {
        dispatchFlowResponse,
-        toolNodeTokens,
+        toolNodeInputTokens,
+        toolNodeOutputTokens,
        completeMessages,
        assistantResponses: toolNodeAssistants,
        runTimes,
@@ -423,7 +433,8 @@ export const runToolWithFunctionCall = async (
      },
      {
        dispatchFlowResponse,
-        toolNodeTokens,
+        toolNodeInputTokens,
+        toolNodeOutputTokens,
        assistantResponses: toolNodeAssistants,
        runTimes
      }
@@ -435,7 +446,8 @@ export const runToolWithFunctionCall = async (
      content: answer
    };
    const completeMessages = filterMessages.concat(gptAssistantResponse);
-    const tokens = await countGptMessagesTokens(completeMessages, undefined, functions);
+    const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
+    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
    // console.log(tokens, 'response token');

    // concat tool assistant
@@ -443,7 +455,12 @@ export const runToolWithFunctionCall = async (

    return {
      dispatchFlowResponse: response?.dispatchFlowResponse || [],
-      toolNodeTokens: response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens,
+      toolNodeInputTokens: response?.toolNodeInputTokens
+        ? response.toolNodeInputTokens + inputTokens
+        : inputTokens,
+      toolNodeOutputTokens: response?.toolNodeOutputTokens
+        ? response.toolNodeOutputTokens + outputTokens
+        : outputTokens,
      completeMessages,
      assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
      runTimes: (response?.runTimes || 0) + 1
--- a/packages/service/core/workflow/dispatch/agent/runTool/index.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/index.ts
@@ -165,6 +165,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
    toolWorkflowInteractiveResponse,
    dispatchFlowResponse, // tool flow response
    toolNodeTokens,
+    toolNodeInputTokens,
+    toolNodeOutputTokens,
    completeMessages = [], // The actual message sent to AI(just save text)
    assistantResponses = [], // FastGPT system store assistant.value response
    runTimes
@@ -225,7 +227,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<

  const { totalPoints, modelName } = formatModelChars2Points({
    model,
-    tokens: toolNodeTokens,
+    inputTokens: toolNodeInputTokens,
+    outputTokens: toolNodeOutputTokens,
    modelType: ModelTypeEnum.llm
  });
  const toolAIUsage = externalProvider.openaiAccount?.key ? 0 : totalPoints;
@@ -255,6 +258,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
      // 展示的积分消耗
      totalPoints: totalPointsUsage,
      toolCallTokens: toolNodeTokens,
+      toolCallInputTokens: toolNodeInputTokens,
+      toolCallOutputTokens: toolNodeOutputTokens,
      childTotalPoints: flatUsages.reduce((sum, item) => sum + item.totalPoints, 0),
      model: modelName,
      query: userChatInput,
@@ -270,9 +275,10 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
      // 工具调用本身的积分消耗
      {
        moduleName: name,
-        totalPoints: toolAIUsage,
        model: modelName,
-        tokens: toolNodeTokens
+        totalPoints: toolAIUsage,
+        inputTokens: toolNodeInputTokens,
+        outputTokens: toolNodeOutputTokens
      },
      // 工具的消耗
      ...flatUsages
--- a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts
@@ -115,7 +115,8 @@ export const runToolWithPromptCall = async (

      return {
        dispatchFlowResponse: [toolRunResponse],
-        toolNodeTokens: 0,
+        toolNodeInputTokens: 0,
+        toolNodeOutputTokens: 0,
        completeMessages: concatMessages,
        assistantResponses: toolRunResponse.assistantResponses,
        runTimes: toolRunResponse.runTimes,
@@ -131,7 +132,8 @@ export const runToolWithPromptCall = async (
      },
      {
        dispatchFlowResponse: [toolRunResponse],
-        toolNodeTokens: 0,
+        toolNodeInputTokens: 0,
+        toolNodeOutputTokens: 0,
        assistantResponses: toolRunResponse.assistantResponses,
        runTimes: toolRunResponse.runTimes
      }
@@ -286,15 +288,20 @@ export const runToolWithPromptCall = async (
      content: replaceAnswer
    };
    const completeMessages = filterMessages.concat(gptAssistantResponse);
-    const tokens = await countGptMessagesTokens(completeMessages, undefined);
-    // console.log(tokens, 'response token');
+    const inputTokens = await countGptMessagesTokens(requestMessages);
+    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);

    // concat tool assistant
    const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;

    return {
      dispatchFlowResponse: response?.dispatchFlowResponse || [],
-      toolNodeTokens: response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens,
+      toolNodeInputTokens: response?.toolNodeInputTokens
+        ? response.toolNodeInputTokens + inputTokens
+        : inputTokens,
+      toolNodeOutputTokens: response?.toolNodeOutputTokens
+        ? response.toolNodeOutputTokens + outputTokens
+        : outputTokens,
      completeMessages,
      assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
      runTimes: (response?.runTimes || 0) + 1
@@ -366,17 +373,9 @@ export const runToolWithPromptCall = async (
    function_call: toolJson
  };

-  /* 
-    ...
-    user
-    assistant: tool data
-  */
-  const concatToolMessages = [
-    ...requestMessages,
-    assistantToolMsgParams
-  ] as ChatCompletionMessageParam[];
  // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
-  const tokens = await countGptMessagesTokens(concatToolMessages, undefined);
+  const inputTokens = await countGptMessagesTokens(requestMessages);
+  const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);

  /* 
    ...
@@ -437,7 +436,12 @@ ANSWER: `;
  }

  const runTimes = (response?.runTimes || 0) + toolsRunResponse.toolResponse.runTimes;
-  const toolNodeTokens = response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens;
+  const toolNodeInputTokens = response?.toolNodeInputTokens
+    ? response.toolNodeInputTokens + inputTokens
+    : inputTokens;
+  const toolNodeOutputTokens = response?.toolNodeOutputTokens
+    ? response.toolNodeOutputTokens + outputTokens
+    : outputTokens;

  // Check stop signal
  const hasStopSignal = toolsRunResponse.toolResponse.flowResponses.some((item) => !!item.toolStop);
@@ -460,7 +464,8 @@ ANSWER: `;

    return {
      dispatchFlowResponse,
-      toolNodeTokens,
+      toolNodeInputTokens,
+      toolNodeOutputTokens,
      completeMessages: filterMessages,
      assistantResponses: toolNodeAssistants,
      runTimes,
@@ -475,7 +480,8 @@ ANSWER: `;
    },
    {
      dispatchFlowResponse,
-      toolNodeTokens,
+      toolNodeInputTokens,
+      toolNodeOutputTokens,
      assistantResponses: toolNodeAssistants,
      runTimes
    }
--- a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts
@@ -158,7 +158,8 @@ export const runToolWithToolChoice = async (

      return {
        dispatchFlowResponse: [toolRunResponse],
-        toolNodeTokens: 0,
+        toolNodeInputTokens: 0,
+        toolNodeOutputTokens: 0,
        completeMessages: requestMessages,
        assistantResponses: toolRunResponse.assistantResponses,
        runTimes: toolRunResponse.runTimes,
@@ -176,7 +177,8 @@ export const runToolWithToolChoice = async (
      },
      {
        dispatchFlowResponse: [toolRunResponse],
-        toolNodeTokens: 0,
+        toolNodeInputTokens: 0,
+        toolNodeOutputTokens: 0,
        assistantResponses: toolRunResponse.assistantResponses,
        runTimes: toolRunResponse.runTimes
      }
@@ -428,7 +430,9 @@ export const runToolWithToolChoice = async (
    ] as ChatCompletionMessageParam[];

    // Only toolCall tokens are counted here, Tool response tokens count towards the next reply
-    const tokens = await countGptMessagesTokens(concatToolMessages, tools);
+    const inputTokens = await countGptMessagesTokens(requestMessages, tools);
+    const outputTokens = await countGptMessagesTokens(assistantToolMsgParams);
+
    /* 
        ...
        user
@@ -463,7 +467,10 @@ export const runToolWithToolChoice = async (
    const runTimes =
      (response?.runTimes || 0) +
      flatToolsResponseData.reduce((sum, item) => sum + item.runTimes, 0);
-    const toolNodeTokens = response ? response.toolNodeTokens + tokens : tokens;
+    const toolNodeInputTokens = response ? response.toolNodeInputTokens + inputTokens : inputTokens;
+    const toolNodeOutputTokens = response
+      ? response.toolNodeOutputTokens + outputTokens
+      : outputTokens;

    // Check stop signal
    const hasStopSignal = flatToolsResponseData.some(
@@ -496,7 +503,8 @@ export const runToolWithToolChoice = async (

      return {
        dispatchFlowResponse,
-        toolNodeTokens,
+        toolNodeInputTokens,
+        toolNodeOutputTokens,
        completeMessages,
        assistantResponses: toolNodeAssistants,
        runTimes,
@@ -512,7 +520,8 @@ export const runToolWithToolChoice = async (
      },
      {
        dispatchFlowResponse,
-        toolNodeTokens,
+        toolNodeInputTokens,
+        toolNodeOutputTokens,
        assistantResponses: toolNodeAssistants,
        runTimes
      }
@@ -524,14 +533,17 @@ export const runToolWithToolChoice = async (
      content: answer
    };
    const completeMessages = filterMessages.concat(gptAssistantResponse);
-    const tokens = await countGptMessagesTokens(completeMessages, tools);
+    const inputTokens = await countGptMessagesTokens(requestMessages, tools);
+    const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);

    // concat tool assistant
    const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;

    return {
      dispatchFlowResponse: response?.dispatchFlowResponse || [],
-      toolNodeTokens: response ? response.toolNodeTokens + tokens : tokens,
+      toolNodeInputTokens: response ? response.toolNodeInputTokens + inputTokens : inputTokens,
+      toolNodeOutputTokens: response ? response.toolNodeOutputTokens + outputTokens : outputTokens,
+
      completeMessages,
      assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
      runTimes: (response?.runTimes || 0) + 1
@@ -578,7 +590,8 @@ async function streamResponse({
          text: content
        })
      });
-    } else if (responseChoice?.tool_calls?.[0]) {
+    }
+    if (responseChoice?.tool_calls?.[0]) {
      const toolCall: ChatCompletionMessageToolCall = responseChoice.tool_calls[0];
      // In a stream response, only one tool is returned at a time.  If have id, description is executing a tool
      if (toolCall.id || callingTool) {
--- a/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts
+++ b/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts
@@ -31,7 +31,9 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{

 export type RunToolResponse = {
  dispatchFlowResponse: DispatchFlowResponse[];
-  toolNodeTokens: number;
+  toolNodeTokens?: number; // deprecated
+  toolNodeInputTokens: number;
+  toolNodeOutputTokens: number;
  completeMessages?: ChatCompletionMessageParam[];
  assistantResponses?: AIChatItemValueItemType[];
  toolWorkflowInteractiveResponse?: WorkflowInteractiveResponseType;
--- a/packages/service/core/workflow/dispatch/chat/oneapi.ts
+++ b/packages/service/core/workflow/dispatch/chat/oneapi.ts
@@ -5,13 +5,17 @@ import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
 import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
 import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
 import { createChatCompletion } from '../../../ai/config';
-import type { ChatCompletion, StreamChatType } from '@fastgpt/global/core/ai/type.d';
+import type {
+  ChatCompletion,
+  ChatCompletionMessageParam,
+  StreamChatType
+} from '@fastgpt/global/core/ai/type.d';
 import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
 import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
 import { postTextCensor } from '../../../../common/api/requestPlusApi';
 import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
 import type { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
-import { countMessagesTokens } from '../../../../common/string/tiktoken/index';
+import { countGptMessagesTokens } from '../../../../common/string/tiktoken/index';
 import {
  chats2GPTMessages,
  chatValue2RuntimePrompt,
@@ -214,16 +218,23 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
    return Promise.reject(getEmptyResponseTip());
  }

-  const completeMessages = requestMessages.concat({
+  const AIMessages: ChatCompletionMessageParam[] = [
+    {
      role: ChatCompletionRequestMessageRoleEnum.Assistant,
      content: answerText
-  });
+    }
+  ];
+
+  const completeMessages = [...requestMessages, ...AIMessages];
  const chatCompleteMessages = GPTMessages2Chats(completeMessages);

-  const tokens = await countMessagesTokens(chatCompleteMessages);
+  const inputTokens = await countGptMessagesTokens(requestMessages);
+  const outputTokens = await countGptMessagesTokens(AIMessages);
+
  const { totalPoints, modelName } = formatModelChars2Points({
    model,
-    tokens,
+    inputTokens,
+    outputTokens,
    modelType: ModelTypeEnum.llm
  });

@@ -232,7 +243,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
    [DispatchNodeResponseKeyEnum.nodeResponse]: {
      totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
      model: modelName,
-      tokens,
+      tokens: inputTokens + outputTokens,
+      inputTokens: inputTokens,
+      outputTokens: outputTokens,
      query: `${userChatInput}`,
      maxToken: max_tokens,
      historyPreview: getHistoryPreview(
@@ -247,7 +260,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
        moduleName: name,
        totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
        model: modelName,
-        tokens
+        inputTokens: inputTokens,
+        outputTokens: outputTokens
      }
    ],
    [DispatchNodeResponseKeyEnum.toolResponses]: answerText,
--- a/packages/service/core/workflow/dispatch/dataset/search.ts
+++ b/packages/service/core/workflow/dispatch/dataset/search.ts
@@ -120,14 +120,14 @@ export async function dispatchDatasetSearch(
  // vector
  const { totalPoints, modelName } = formatModelChars2Points({
    model: vectorModel.model,
-    tokens,
+    inputTokens: tokens,
    modelType: ModelTypeEnum.vector
  });
  const responseData: DispatchNodeResponseType & { totalPoints: number } = {
    totalPoints,
    query: concatQueries.join('\n'),
    model: modelName,
-    tokens,
+    inputTokens: tokens,
    similarity: usingSimilarityFilter ? similarity : undefined,
    limit,
    searchMode,
@@ -139,19 +139,21 @@ export async function dispatchDatasetSearch(
      totalPoints,
      moduleName: node.name,
      model: modelName,
-      tokens
+      inputTokens: tokens
    }
  ];

  if (aiExtensionResult) {
    const { totalPoints, modelName } = formatModelChars2Points({
      model: aiExtensionResult.model,
-      tokens: aiExtensionResult.tokens,
+      inputTokens: aiExtensionResult.inputTokens,
+      outputTokens: aiExtensionResult.outputTokens,
      modelType: ModelTypeEnum.llm
    });

    responseData.totalPoints += totalPoints;
-    responseData.tokens = aiExtensionResult.tokens;
+    responseData.inputTokens = aiExtensionResult.inputTokens;
+    responseData.outputTokens = aiExtensionResult.outputTokens;
    responseData.extensionModel = modelName;
    responseData.extensionResult =
      aiExtensionResult.extensionQueries?.join('\n') ||
@@ -161,7 +163,8 @@ export async function dispatchDatasetSearch(
      totalPoints,
      moduleName: 'core.module.template.Query extension',
      model: modelName,
-      tokens: aiExtensionResult.tokens
+      inputTokens: aiExtensionResult.inputTokens,
+      outputTokens: aiExtensionResult.outputTokens
    });
  }

--- a/packages/service/core/workflow/dispatch/plugin/run.ts
+++ b/packages/service/core/workflow/dispatch/plugin/run.ts
@@ -130,8 +130,7 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
    [DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
      {
        moduleName: plugin.name,
-        totalPoints: usagePoints,
-        tokens: 0
+        totalPoints: usagePoints
      }
    ],
    [DispatchNodeResponseKeyEnum.toolResponses]: output?.pluginOutput
--- a/packages/service/core/workflow/dispatch/plugin/runApp.ts
+++ b/packages/service/core/workflow/dispatch/plugin/runApp.ts
@@ -153,8 +153,7 @@ export const dispatchRunAppNode = async (props: Props): Promise<Response> => {
    [DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
      {
        moduleName: appData.name,
-        totalPoints: usagePoints,
-        tokens: 0
+        totalPoints: usagePoints
      }
    ],
    [DispatchNodeResponseKeyEnum.toolResponses]: text,
--- a/packages/service/core/workflow/dispatch/tools/queryExternsion.ts
+++ b/packages/service/core/workflow/dispatch/tools/queryExternsion.ts
@@ -31,7 +31,7 @@ export const dispatchQueryExtension = async ({
  const queryExtensionModel = getLLMModel(model);
  const chatHistories = getHistories(history, histories);

-  const { extensionQueries, tokens } = await queryExtension({
+  const { extensionQueries, inputTokens, outputTokens } = await queryExtension({
    chatBg: systemPrompt,
    query: userChatInput,
    histories: chatHistories,
@@ -42,7 +42,8 @@ export const dispatchQueryExtension = async ({

  const { totalPoints, modelName } = formatModelChars2Points({
    model: queryExtensionModel.model,
-    tokens,
+    inputTokens,
+    outputTokens,
    modelType: ModelTypeEnum.llm
  });

@@ -59,7 +60,8 @@ export const dispatchQueryExtension = async ({
    [DispatchNodeResponseKeyEnum.nodeResponse]: {
      totalPoints,
      model: modelName,
-      tokens,
+      inputTokens,
+      outputTokens,
      query: userChatInput,
      textOutput: JSON.stringify(filterSameQueries)
    },
@@ -68,7 +70,8 @@ export const dispatchQueryExtension = async ({
        moduleName: node.name,
        totalPoints,
        model: modelName,
-        tokens
+        inputTokens,
+        outputTokens
      }
    ],
    [NodeOutputKeyEnum.text]: JSON.stringify(filterSameQueries)
--- a/packages/service/support/wallet/usage/controller.ts
+++ b/packages/service/support/wallet/usage/controller.ts
@@ -31,20 +31,23 @@ export const createTrainingUsage = async ({
          {
            moduleName: 'support.wallet.moduleName.index',
            model: vectorModel,
-            tokens: 0,
-            amount: 0
+            amount: 0,
+            inputTokens: 0,
+            outputTokens: 0
          },
          {
            moduleName: 'support.wallet.moduleName.qa',
            model: agentModel,
-            tokens: 0,
-            amount: 0
+            amount: 0,
+            inputTokens: 0,
+            outputTokens: 0
          },
          {
            moduleName: 'core.dataset.training.Auto mode',
            model: agentModel,
-            tokens: 0,
-            amount: 0
+            amount: 0,
+            inputTokens: 0,
+            outputTokens: 0
          }
        ]
      }
--- a/packages/service/support/wallet/usage/schema.ts
+++ b/packages/service/support/wallet/usage/schema.ts
@@ -1,7 +1,7 @@
 import { connectionMongo, getMongoModel, type Model } from '../../../common/mongo';
-const { Schema, model, models } = connectionMongo;
+const { Schema } = connectionMongo;
 import { UsageSchemaType } from '@fastgpt/global/support/wallet/usage/type';
-import { UsageSourceMap } from '@fastgpt/global/support/wallet/usage/constants';
+import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
 import {
  TeamCollectionName,
  TeamMemberCollectionName
@@ -22,7 +22,7 @@ const UsageSchema = new Schema({
  },
  source: {
    type: String,
-    enum: Object.keys(UsageSourceMap),
+    enum: Object.values(UsageSourceEnum),
    required: true
  },
  appName: {
@@ -65,7 +65,7 @@ try {
  // timer task. clear dead team
  // UsageSchema.index({ teamId: 1, time: -1 }, { background: true });

-  UsageSchema.index({ time: 1 }, { background: true, expireAfterSeconds: 720 * 24 * 60 * 60 });
+  UsageSchema.index({ time: 1 }, { background: true, expireAfterSeconds: 360 * 24 * 60 * 60 });
 } catch (error) {
  console.log(error);
 }
--- a/packages/service/support/wallet/usage/utils.ts
+++ b/packages/service/support/wallet/usage/utils.ts
@@ -1,17 +1,20 @@
+import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
 import { ModelTypeEnum, getModelMap } from '../../../core/ai/model';

 export const formatModelChars2Points = ({
  model,
-  tokens = 0,
+  inputTokens = 0,
+  outputTokens = 0,
  modelType,
  multiple = 1000
 }: {
  model: string;
-  tokens: number;
+  inputTokens?: number;
+  outputTokens?: number;
  modelType: `${ModelTypeEnum}`;
  multiple?: number;
 }) => {
-  const modelData = getModelMap?.[modelType]?.(model);
+  const modelData = getModelMap?.[modelType]?.(model) as LLMModelItemType;
  if (!modelData) {
    return {
      totalPoints: 0,
@@ -19,7 +22,12 @@ export const formatModelChars2Points = ({
    };
  }

-  const totalPoints = (modelData.charsPointsPrice || 0) * (tokens / multiple);
+  const isIOPriceType = typeof modelData.inputPrice === 'number';
+
+  const totalPoints = isIOPriceType
+    ? (modelData.inputPrice || 0) * (inputTokens / multiple) +
+      (modelData.outputPrice || 0) * (outputTokens / multiple)
+    : (modelData.charsPointsPrice || 0) * ((inputTokens + outputTokens) / multiple);

  return {
    modelName: modelData.name,
--- a/packages/service/type.d.ts
+++ b/packages/service/type.d.ts
@@ -18,6 +18,7 @@ declare global {
  var subPlans: SubPlanType | undefined;

  var llmModels: LLMModelItemType[];
+  var llmModelPriceType: 'IO' | 'Tokens';
  var vectorModels: VectorModelItemType[];
  var audioSpeechModels: AudioSpeechModelType[];
  var whisperModel: STTModelType;
--- a/packages/web/i18n/en/account_usage.json
+++ b/packages/web/i18n/en/account_usage.json
@@ -6,12 +6,14 @@
  "details": "Details",
  "duration_seconds": "Duration (seconds)",
  "generation_time": "Generation time",
+  "input_token_length": "input tokens",
  "member": "member",
  "member_name": "Member name",
  "module_name": "module name",
  "month": "moon",
  "no_usage_records": "No usage record yet",
  "order_number": "Order number",
+  "output_token_length": "output tokens",
  "project_name": "Project name",
  "source": "source",
  "text_length": "text length",
--- a/packages/web/i18n/en/common.json
+++ b/packages/web/i18n/en/common.json
@@ -447,6 +447,8 @@
  "core.chat.response.Extension model": "Question Optimization Model",
  "core.chat.response.Read complete response": "View Details",
  "core.chat.response.Read complete response tips": "Click to View Detailed Process",
+  "core.chat.response.Tool call input tokens": "Tool Call Input Tokens Consumption",
+  "core.chat.response.Tool call output tokens": "Tool Call Output Tokens Consumption",
  "core.chat.response.Tool call tokens": "Tool Call Tokens Consumption",
  "core.chat.response.context total length": "Total Context Length",
  "core.chat.response.loop_input": "Loop Input Array",
@@ -460,10 +462,12 @@
  "core.chat.response.module historyPreview": "History Preview (Only Partial Content Displayed)",
  "core.chat.response.module http result": "Response Body",
  "core.chat.response.module if else Result": "Condition Result",
+  "core.chat.response.module input tokens": "input tokens",
  "core.chat.response.module limit": "Single Search Limit",
  "core.chat.response.module maxToken": "Max Response Tokens",
  "core.chat.response.module model": "Model",
  "core.chat.response.module name": "Model Name",
+  "core.chat.response.module output tokens": "output tokens",
  "core.chat.response.module query": "Question/Search Term",
  "core.chat.response.module quoteList": "Quote Content",
  "core.chat.response.module similarity": "Similarity",
@@ -1043,6 +1047,8 @@
  "support.user.team.Team Tags Async Success": "Sync Completed",
  "support.user.team.member": "Member",
  "support.wallet.Ai point every thousand tokens": "{{points}} Points/1K Tokens",
+  "support.wallet.Ai point every thousand tokens_input": "Input：{{points}} points/1K tokens",
+  "support.wallet.Ai point every thousand tokens_output": "Output：{{points}} points/1K tokens",
  "support.wallet.Amount": "Amount",
  "support.wallet.Buy": "Buy",
  "support.wallet.Not sufficient": "Insufficient AI Points, Please Upgrade Your Package or Purchase Additional AI Points to Continue Using.",
--- a/packages/web/i18n/zh-CN/account_usage.json
+++ b/packages/web/i18n/zh-CN/account_usage.json
@@ -1,23 +1,25 @@
 {
-  "usage_detail": "使用详情",
-  "order_number": "订单号",
-  "generation_time": "生成时间",
-  "month": "月",
-  "app_name": "应用名",
-  "source": "来源",
-  "total_points_consumed": "AI 积分消耗",
-  "billing_module": "扣费模块",
-  "module_name": "模块名",
  "ai_model": "AI 模型",
-  "token_length": "token 长度",
-  "text_length": "文本长度",
-  "duration_seconds": "时长（秒）",
  "all": "所有",
+  "app_name": "应用名",
+  "billing_module": "扣费模块",
+  "details": "详情",
+  "duration_seconds": "时长（秒）",
+  "generation_time": "生成时间",
+  "input_token_length": "输入 tokens",
  "member": "成员",
  "member_name": "成员名",
-  "user_type": "类型",
+  "module_name": "模块名",
+  "month": "月",
+  "no_usage_records": "暂无使用记录",
+  "order_number": "订单号",
+  "output_token_length": "输出 tokens",
  "project_name": "项目名",
+  "source": "来源",
+  "text_length": "文本长度",
+  "token_length": "token 长度",
  "total_points": "AI 积分消耗",
-  "details": "详情",
-  "no_usage_records": "暂无使用记录"
+  "total_points_consumed": "AI 积分消耗",
+  "usage_detail": "使用详情",
+  "user_type": "类型"
 }
--- a/packages/web/i18n/zh-CN/common.json
+++ b/packages/web/i18n/zh-CN/common.json
@@ -450,6 +450,8 @@
  "core.chat.response.Extension model": "问题优化模型",
  "core.chat.response.Read complete response": "查看详情",
  "core.chat.response.Read complete response tips": "点击查看详细流程",
+  "core.chat.response.Tool call input tokens": "工具调用输入 Tokens",
+  "core.chat.response.Tool call output tokens": "工具调用输出 Tokens",
  "core.chat.response.Tool call tokens": "工具调用 tokens 消耗",
  "core.chat.response.context total length": "上下文总长度",
  "core.chat.response.loop_input": "输入数组",
@@ -463,10 +465,12 @@
  "core.chat.response.module historyPreview": "记录预览(仅展示部分内容)",
  "core.chat.response.module http result": "响应体",
  "core.chat.response.module if else Result": "判断器结果",
+  "core.chat.response.module input tokens": "输入 Tokens",
  "core.chat.response.module limit": "单次搜索上限",
  "core.chat.response.module maxToken": "最大响应 tokens",
  "core.chat.response.module model": "模型",
  "core.chat.response.module name": "模型名",
+  "core.chat.response.module output tokens": "输出 Tokens",
  "core.chat.response.module query": "问题/检索词",
  "core.chat.response.module quoteList": "引用内容",
  "core.chat.response.module similarity": "相似度",
@@ -1046,6 +1050,8 @@
  "support.user.team.Team Tags Async Success": "同步完成",
  "support.user.team.member": "成员",
  "support.wallet.Ai point every thousand tokens": "{{points}} 积分/1K tokens",
+  "support.wallet.Ai point every thousand tokens_input": "输入：{{points}} 积分/1K tokens",
+  "support.wallet.Ai point every thousand tokens_output": "输出：{{points}} 积分/1K tokens",
  "support.wallet.Amount": "金额",
  "support.wallet.Buy": "购买",
  "support.wallet.Not sufficient": "您的 AI 积分不足，请先升级套餐或购买额外 AI 积分后继续使用。",
--- a/packages/web/i18n/zh-Hant/account_usage.json
+++ b/packages/web/i18n/zh-Hant/account_usage.json
@@ -6,12 +6,14 @@
  "details": "詳情",
  "duration_seconds": "時長（秒）",
  "generation_time": "生成時間",
+  "input_token_length": "輸入 tokens",
  "member": "成員",
  "member_name": "成員名",
  "module_name": "模組名",
  "month": "月",
  "no_usage_records": "暫無使用紀錄",
  "order_number": "訂單編號",
+  "output_token_length": "輸出 tokens",
  "project_name": "專案名",
  "source": "來源",
  "text_length": "文字長度",
--- a/packages/web/i18n/zh-Hant/common.json
+++ b/packages/web/i18n/zh-Hant/common.json
@@ -447,6 +447,8 @@
  "core.chat.response.Extension model": "問題最佳化模型",
  "core.chat.response.Read complete response": "檢視詳細資料",
  "core.chat.response.Read complete response tips": "點選檢視詳細流程",
+  "core.chat.response.Tool call input tokens": "工具呼叫輸入 Token 消耗",
+  "core.chat.response.Tool call output tokens": "工具呼叫輸出 Token 消耗",
  "core.chat.response.Tool call tokens": "工具呼叫 Token 消耗",
  "core.chat.response.context total length": "上下文總長度",
  "core.chat.response.loop_input": "輸入陣列",
@@ -460,10 +462,12 @@
  "core.chat.response.module historyPreview": "記錄預覽（僅顯示部分內容）",
  "core.chat.response.module http result": "回應內容",
  "core.chat.response.module if else Result": "條件判斷結果",
+  "core.chat.response.module input tokens": "輸入 tokens",
  "core.chat.response.module limit": "單次搜尋上限",
  "core.chat.response.module maxToken": "最大回應 Token 數",
  "core.chat.response.module model": "模型",
  "core.chat.response.module name": "模型名稱",
+  "core.chat.response.module output tokens": "輸出 tokens",
  "core.chat.response.module query": "問題/搜尋詞",
  "core.chat.response.module quoteList": "引用內容",
  "core.chat.response.module similarity": "相似度",
@@ -1043,6 +1047,8 @@
  "support.user.team.Team Tags Async Success": "同步完成",
  "support.user.team.member": "成員",
  "support.wallet.Ai point every thousand tokens": "{{points}} 點數/1K tokens",
+  "support.wallet.Ai point every thousand tokens_input": "輸入：{{points}} 积分/1K tokens",
+  "support.wallet.Ai point every thousand tokens_output": "輸出：{{points}} 积分/1K tokens",
  "support.wallet.Amount": "金額",
  "support.wallet.Buy": "購買",
  "support.wallet.Not sufficient": "您的 AI 點數不足，請先升級方案或購買額外 AI 點數後繼續使用。",
--- a/projects/app/src/components/core/ai/AISettingModal/index.tsx
+++ b/projects/app/src/components/core/ai/AISettingModal/index.tsx
@@ -18,7 +18,6 @@ import {
  Thead,
  Tr,
  Table,
-  useDisclosure,
  FlexProps
 } from '@chakra-ui/react';
 import { useSystemStore } from '@/web/common/system/useSystemStore';
@@ -175,10 +174,28 @@ const AIChatSettingsModal = ({
            <Tbody>
              <Tr color={'myGray.900'}>
                <Td pt={0} pb={2}>
+                  {typeof selectedModel?.inputPrice === 'number' ? (
+                    <>
+                      <Box>
+                        {t('common:support.wallet.Ai point every thousand tokens_input', {
+                          points: selectedModel?.inputPrice || 0
+                        })}
+                      </Box>
+                      <Box>
+                        {t('common:support.wallet.Ai point every thousand tokens_output', {
+                          points: selectedModel?.outputPrice || 0
+                        })}
+                      </Box>
+                    </>
+                  ) : (
+                    <>
                      {t('common:support.wallet.Ai point every thousand tokens', {
                        points: selectedModel?.charsPointsPrice || 0
                      })}
+                    </>
+                  )}
                </Td>
+
                <Td pt={0} pb={2}>
                  {Math.round((selectedModel?.maxContext || 4096) / 1000)}K
                </Td>
--- a/projects/app/src/components/core/ai/ModelTable/index.tsx
+++ b/projects/app/src/components/core/ai/ModelTable/index.tsx
@@ -60,7 +60,25 @@ const ModelTable = () => {
    const formatLLMModelList = llmModelList.map((item) => ({
      ...item,
      typeLabel: t('common:model.type.chat'),
-      priceLabel: (
+      priceLabel:
+        typeof item.inputPrice === 'number' ? (
+          <Box>
+            <Flex>
+              {`${t('common:common.Input')}:`}
+              <Box fontWeight={'bold'} color={'myGray.900'} mr={0.5} ml={2}>
+                {item.inputPrice || 0}
+              </Box>
+              {`${t('common:support.wallet.subscription.point')} / 1K Tokens`}
+            </Flex>
+            <Flex>
+              {`${t('common:common.Output')}:`}
+              <Box fontWeight={'bold'} color={'myGray.900'} mr={0.5} ml={2}>
+                {item.outputPrice || 0}
+              </Box>
+              {`${t('common:support.wallet.subscription.point')} / 1K Tokens`}
+            </Flex>
+          </Box>
+        ) : (
          <Flex color={'myGray.700'}>
            <Box fontWeight={'bold'} color={'myGray.900'} mr={0.5}>
              {item.charsPointsPrice}
@@ -149,13 +167,13 @@ const ModelTable = () => {

    return filterList;
  }, [
-    provider,
-    modelType,
    llmModelList,
    vectorModelList,
    audioSpeechModelList,
    whisperModel,
    t,
+    modelType,
+    provider,
    search
  ]);

--- a/projects/app/src/components/core/chat/components/WholeResponseModal.tsx
+++ b/projects/app/src/components/core/chat/components/WholeResponseModal.tsx
@@ -155,10 +155,26 @@ export const WholeResponseContent = ({
          label={t('common:core.chat.response.module tokens')}
          value={`${activeModule?.tokens}`}
        />
+        <Row
+          label={t('common:core.chat.response.module input tokens')}
+          value={`${activeModule?.inputTokens}`}
+        />
+        <Row
+          label={t('common:core.chat.response.module output tokens')}
+          value={`${activeModule?.outputTokens}`}
+        />
        <Row
          label={t('common:core.chat.response.Tool call tokens')}
          value={`${activeModule?.toolCallTokens}`}
        />
+        <Row
+          label={t('common:core.chat.response.Tool call input tokens')}
+          value={`${activeModule?.toolCallInputTokens}`}
+        />
+        <Row
+          label={t('common:core.chat.response.Tool call output tokens')}
+          value={`${activeModule?.toolCallOutputTokens}`}
+        />

        <Row label={t('common:core.chat.response.module query')} value={activeModule?.query} />
        <Row
--- a/projects/app/src/pages/account/usage/UsageDetail.tsx
+++ b/projects/app/src/pages/account/usage/UsageDetail.tsx
@@ -26,9 +26,12 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
    [usage.list]
  );

-  const { hasModel, hasToken, hasCharsLen, hasDuration } = useMemo(() => {
+  const { hasModel, hasToken, hasInputToken, hasOutputToken, hasCharsLen, hasDuration } =
+    useMemo(() => {
      let hasModel = false;
      let hasToken = false;
+      let hasInputToken = false;
+      let hasOutputToken = false;
      let hasCharsLen = false;
      let hasDuration = false;
      let hasDataLen = false;
@@ -41,6 +44,12 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
        if (typeof item.tokens === 'number') {
          hasToken = true;
        }
+        if (typeof item.inputTokens === 'number') {
+          hasInputToken = true;
+        }
+        if (typeof item.outputTokens === 'number') {
+          hasOutputToken = true;
+        }
        if (typeof item.charsLength === 'number') {
          hasCharsLen = true;
        }
@@ -52,6 +61,8 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
      return {
        hasModel,
        hasToken,
+        hasInputToken,
+        hasOutputToken,
        hasCharsLen,
        hasDuration,
        hasDataLen
@@ -98,6 +109,8 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
                  <Th>{t('account_usage:module_name')}</Th>
                  {hasModel && <Th>{t('account_usage:ai_model')}</Th>}
                  {hasToken && <Th>{t('account_usage:token_length')}</Th>}
+                  {hasInputToken && <Th>{t('account_usage:input_token_length')}</Th>}
+                  {hasOutputToken && <Th>{t('account_usage:output_token_length')}</Th>}
                  {hasCharsLen && <Th>{t('account_usage:text_length')}</Th>}
                  {hasDuration && <Th>{t('account_usage:duration_seconds')}</Th>}
                  <Th>{t('account_usage:total_points_consumed')}</Th>
@@ -109,6 +122,8 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
                    <Td>{t(item.moduleName as any)}</Td>
                    {hasModel && <Td>{item.model ?? '-'}</Td>}
                    {hasToken && <Td>{item.tokens ?? '-'}</Td>}
+                    {hasInputToken && <Td>{item.inputTokens ?? '-'}</Td>}
+                    {hasOutputToken && <Td>{item.outputTokens ?? '-'}</Td>}
                    {hasCharsLen && <Td>{item.charsLength ?? '-'}</Td>}
                    {hasDuration && <Td>{item.duration ?? '-'}</Td>}
                    <Td>{formatNumber(item.amount)}</Td>
--- a/projects/app/src/pages/api/core/ai/agent/createQuestionGuide.ts
+++ b/projects/app/src/pages/api/core/ai/agent/createQuestionGuide.ts
@@ -37,7 +37,7 @@ async function handler(

    const qgModel = global.llmModels[0];

-    const { result, tokens } = await createQuestionGuide({
+    const { result, inputTokens, outputTokens } = await createQuestionGuide({
      messages,
      model: qgModel.model
    });
@@ -47,7 +47,8 @@ async function handler(
    });

    pushQuestionGuideUsage({
-      tokens,
+      inputTokens,
+      outputTokens,
      teamId,
      tmbId
    });
--- a/projects/app/src/pages/api/core/ai/agent/v2/createQuestionGuide.ts
+++ b/projects/app/src/pages/api/core/ai/agent/v2/createQuestionGuide.ts
@@ -52,14 +52,15 @@ async function handler(req: ApiRequestProps<CreateQuestionGuideParams>, res: Nex

  const qgModel = questionGuide?.model || global.llmModels[0].model;

-  const { result, tokens } = await createQuestionGuide({
+  const { result, inputTokens, outputTokens } = await createQuestionGuide({
    messages,
    model: qgModel,
    customPrompt: questionGuide?.customPrompt
  });

  pushQuestionGuideUsage({
-    tokens,
+    inputTokens,
+    outputTokens,
    teamId,
    tmbId
  });
--- a/projects/app/src/pages/api/core/dataset/data/insertData.ts
+++ b/projects/app/src/pages/api/core/dataset/data/insertData.ts
@@ -89,7 +89,7 @@ async function handler(req: NextApiRequest) {
  pushGenerateVectorUsage({
    teamId,
    tmbId,
-    tokens,
+    inputTokens: tokens,
    model: vectorModelData.model
  });

--- a/projects/app/src/pages/api/core/dataset/data/update.ts
+++ b/projects/app/src/pages/api/core/dataset/data/update.ts
@@ -36,7 +36,7 @@ async function handler(req: ApiRequestProps<UpdateDatasetDataProps>) {
    pushGenerateVectorUsage({
      teamId,
      tmbId,
-      tokens,
+      inputTokens: tokens,
      model: vectorModel
    });
  } else {
--- a/projects/app/src/pages/api/core/dataset/searchTest.ts
+++ b/projects/app/src/pages/api/core/dataset/searchTest.ts
@@ -74,14 +74,15 @@ async function handler(req: NextApiRequest) {
  const { totalPoints } = pushGenerateVectorUsage({
    teamId,
    tmbId,
-    tokens,
+    inputTokens: tokens,
    model: dataset.vectorModel,
    source: apikey ? UsageSourceEnum.api : UsageSourceEnum.fastgpt,

    ...(aiExtensionResult &&
      extensionModel && {
        extensionModel: extensionModel.name,
-        extensionTokens: aiExtensionResult.tokens
+        extensionInputTokens: aiExtensionResult.inputTokens,
+        extensionOutputTokens: aiExtensionResult.outputTokens
      })
  });
  if (apikey) {
--- a/projects/app/src/pages/api/v1/embeddings.ts
+++ b/projects/app/src/pages/api/v1/embeddings.ts
@@ -57,7 +57,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
  const { totalPoints } = pushGenerateVectorUsage({
    teamId,
    tmbId,
-    tokens,
+    inputTokens: tokens,
    model,
    billId,
    source: getUsageSourceByAuthType({ authType })
--- a/projects/app/src/pages/dataset/detail/components/Import/Context.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Import/Context.tsx
@@ -196,7 +196,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
      chunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
      showChunkInput: false,
      showPromptInput: false,
-      charsPointsPrice: agentModel.charsPointsPrice,
+      charsPointsPrice: agentModel.charsPointsPrice || 0,
      priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
        price: agentModel.charsPointsPrice
      }),
@@ -211,7 +211,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
      chunkSize: embeddingChunkSize,
      showChunkInput: true,
      showPromptInput: false,
-      charsPointsPrice: vectorModel.charsPointsPrice,
+      charsPointsPrice: vectorModel.charsPointsPrice || 0,
      priceTip: t('dataset:import.Embedding Estimated Price Tips', {
        price: vectorModel.charsPointsPrice
      }),
@@ -226,7 +226,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
      chunkSize: qaChunkSize,
      showChunkInput: true,
      showPromptInput: true,
-      charsPointsPrice: agentModel.charsPointsPrice,
+      charsPointsPrice: agentModel.charsPointsPrice || 0,
      priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
        price: agentModel.charsPointsPrice
      }),
--- a/projects/app/src/service/events/generateQA.ts
+++ b/projects/app/src/service/events/generateQA.ts
@@ -12,7 +12,10 @@ import { getLLMModel } from '@fastgpt/service/core/ai/model';
 import { checkTeamAiPointsAndLock } from './utils';
 import { checkInvalidChunkAndLock } from '@fastgpt/service/core/dataset/training/utils';
 import { addMinutes } from 'date-fns';
-import { countGptMessagesTokens } from '@fastgpt/service/common/string/tiktoken/index';
+import {
+  countGptMessagesTokens,
+  countPromptTokens
+} from '@fastgpt/service/common/string/tiktoken/index';
 import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
 import { loadRequestMessages } from '@fastgpt/service/core/chat/utils';
 import { llmCompletionsBodyFormat } from '@fastgpt/service/core/ai/utils';
@@ -153,7 +156,8 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
      pushQAUsage({
        teamId: data.teamId,
        tmbId: data.tmbId,
-        tokens: await countGptMessagesTokens(messages),
+        inputTokens: await countGptMessagesTokens(messages),
+        outputTokens: await countPromptTokens(answer),
        billId: data.billId,
        model: modelData.model
      });
--- a/projects/app/src/service/events/generateVector.ts
+++ b/projects/app/src/service/events/generateVector.ts
@@ -111,7 +111,7 @@ export async function generateVector(): Promise<any> {
    pushGenerateVectorUsage({
      teamId: data.teamId,
      tmbId: data.tmbId,
-      tokens,
+      inputTokens: tokens,
      model: data.model,
      billId: data.billId
    });
--- a/projects/app/src/service/support/wallet/usage/push.ts
+++ b/projects/app/src/service/support/wallet/usage/push.ts
@@ -37,7 +37,8 @@ export const pushChatUsage = ({
      moduleName: item.moduleName,
      amount: item.totalPoints || 0,
      model: item.model,
-      tokens: item.tokens
+      inputTokens: item.inputTokens,
+      outputTokens: item.outputTokens
    }))
  });
  addLog.info(`finish completions`, {
@@ -52,20 +53,23 @@ export const pushQAUsage = async ({
  teamId,
  tmbId,
  model,
-  tokens,
+  inputTokens,
+  outputTokens,
  billId
 }: {
  teamId: string;
  tmbId: string;
  model: string;
-  tokens: number;
+  inputTokens: number;
+  outputTokens: number;
  billId: string;
 }) => {
  // 计算价格
  const { totalPoints } = formatModelChars2Points({
    model,
    modelType: ModelTypeEnum.llm,
-    tokens
+    inputTokens,
+    outputTokens
  });

  concatUsage({
@@ -73,7 +77,8 @@ export const pushQAUsage = async ({
    teamId,
    tmbId,
    totalPoints,
-    tokens,
+    inputTokens,
+    outputTokens,
    listIndex: 1
  });

@@ -84,30 +89,32 @@ export const pushGenerateVectorUsage = ({
  billId,
  teamId,
  tmbId,
-  tokens,
+  inputTokens,
  model,
  source = UsageSourceEnum.fastgpt,
  extensionModel,
-  extensionTokens
+  extensionInputTokens,
+  extensionOutputTokens
 }: {
  billId?: string;
  teamId: string;
  tmbId: string;
-  tokens: number;
+  inputTokens: number;
  model: string;
  source?: UsageSourceEnum;

  extensionModel?: string;
-  extensionTokens?: number;
+  extensionInputTokens?: number;
+  extensionOutputTokens?: number;
 }) => {
  const { totalPoints: totalVector, modelName: vectorModelName } = formatModelChars2Points({
    modelType: ModelTypeEnum.vector,
    model,
-    tokens
+    inputTokens
  });

  const { extensionTotalPoints, extensionModelName } = (() => {
-    if (!extensionModel || !extensionTokens)
+    if (!extensionModel || !extensionInputTokens)
      return {
        extensionTotalPoints: 0,
        extensionModelName: ''
@@ -115,7 +122,8 @@ export const pushGenerateVectorUsage = ({
    const { totalPoints, modelName } = formatModelChars2Points({
      modelType: ModelTypeEnum.llm,
      model: extensionModel,
-      tokens: extensionTokens
+      inputTokens: extensionInputTokens,
+      outputTokens: extensionOutputTokens
    });
    return {
      extensionTotalPoints: totalPoints,
@@ -132,7 +140,7 @@ export const pushGenerateVectorUsage = ({
      tmbId,
      totalPoints,
      billId,
-      tokens,
+      inputTokens,
      listIndex: 0
    });
  } else {
@@ -147,7 +155,7 @@ export const pushGenerateVectorUsage = ({
          moduleName: 'support.wallet.moduleName.index',
          amount: totalVector,
          model: vectorModelName,
-          tokens
+          inputTokens
        },
        ...(extensionModel !== undefined
          ? [
@@ -155,7 +163,8 @@ export const pushGenerateVectorUsage = ({
                moduleName: 'core.module.template.Query extension',
                amount: extensionTotalPoints,
                model: extensionModelName,
-                tokens: extensionTokens
+                inputTokens: extensionInputTokens,
+                outputTokens: extensionOutputTokens
              }
            ]
          : [])
@@ -166,17 +175,20 @@ export const pushGenerateVectorUsage = ({
 };

 export const pushQuestionGuideUsage = ({
-  tokens,
+  inputTokens,
+  outputTokens,
  teamId,
  tmbId
 }: {
-  tokens: number;
+  inputTokens: number;
+  outputTokens: number;
  teamId: string;
  tmbId: string;
 }) => {
  const qgModel = global.llmModels[0];
  const { totalPoints, modelName } = formatModelChars2Points({
-    tokens,
+    inputTokens,
+    outputTokens,
    model: qgModel.model,
    modelType: ModelTypeEnum.llm
  });
@@ -192,7 +204,8 @@ export const pushQuestionGuideUsage = ({
        moduleName: 'core.app.Question Guide',
        amount: totalPoints,
        model: modelName,
-        tokens
+        inputTokens,
+        outputTokens
      }
    ]
  });
@@ -215,7 +228,7 @@ export function pushAudioSpeechUsage({
 }) {
  const { totalPoints, modelName } = formatModelChars2Points({
    model,
-    tokens: charsLength,
+    inputTokens: charsLength,
    modelType: ModelTypeEnum.audioSpeech
  });

@@ -251,7 +264,7 @@ export function pushWhisperUsage({

  const { totalPoints, modelName } = formatModelChars2Points({
    model: whisperModel.model,
-    tokens: duration,
+    inputTokens: duration,
    modelType: ModelTypeEnum.whisper,
    multiple: 60
  });