feat: get tokens from api usage (#4671)

2025-07-26 15:50:25 +00:00 · 2025-04-27 01:13:38 +08:00
parent 0720bbe4da
commit 1465999c46
26 changed files with 270 additions and 223 deletions
--- a/projects/app/src/components/core/chat/components/WholeResponseModal.tsx
+++ b/projects/app/src/components/core/chat/components/WholeResponseModal.tsx
@@ -154,30 +154,21 @@ export const WholeResponseContent = ({
          value={`${activeModule?.runningTime || 0}s`}
        />
        <Row label={t('common:core.chat.response.module model')} value={activeModule?.model} />
-        <Row
-          label={t('common:core.chat.response.module tokens')}
-          value={`${activeModule?.tokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.module input tokens')}
-          value={`${activeModule?.inputTokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.module output tokens')}
-          value={`${activeModule?.outputTokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.Tool call tokens')}
-          value={`${activeModule?.toolCallTokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.Tool call input tokens')}
-          value={`${activeModule?.toolCallInputTokens}`}
-        />
-        <Row
-          label={t('common:core.chat.response.Tool call output tokens')}
-          value={`${activeModule?.toolCallOutputTokens}`}
-        />
+        {activeModule?.tokens && (
+          <Row label={t('chat:llm_tokens')} value={`${activeModule?.tokens}`} />
+        )}
+        {(!!activeModule?.inputTokens || !!activeModule?.outputTokens) && (
+          <Row
+            label={t('chat:llm_tokens')}
+            value={`Input/Output = ${activeModule?.inputTokens || 0}/${activeModule?.outputTokens || 0}`}
+          />
+        )}
+        {(!!activeModule?.toolCallInputTokens || !!activeModule?.toolCallOutputTokens) && (
+          <Row
+            label={t('common:core.chat.response.Tool call tokens')}
+            value={`Input/Output = ${activeModule?.toolCallInputTokens || 0}/${activeModule?.toolCallOutputTokens || 0}`}
+          />
+        )}

        <Row label={t('common:core.chat.response.module query')} value={activeModule?.query} />
        <Row
--- a/projects/app/src/pages/api/core/ai/model/test.ts
+++ b/projects/app/src/pages/api/core/ai/model/test.ts
@@ -16,7 +16,7 @@ import { reRankRecall } from '@fastgpt/service/core/ai/rerank';
 import { aiTranscriptions } from '@fastgpt/service/core/ai/audio/transcriptions';
 import { isProduction } from '@fastgpt/global/common/system/constants';
 import * as fs from 'fs';
-import { llmCompletionsBodyFormat } from '@fastgpt/service/core/ai/utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '@fastgpt/service/core/ai/utils';

 export type testQuery = { model: string; channelId?: number };

@@ -88,23 +88,10 @@ const testLLMModel = async (model: LLMModelItemType, headers: Record<string, str
      }
    }
  });
+  const { text: answer } = await llmResponseToAnswerText(response);

-  if (isStreamResponse) {
-    for await (const part of response) {
-      const content = part.choices?.[0]?.delta?.content || '';
-      // @ts-ignore
-      const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
-      if (content || reasoningContent) {
-        response?.controller?.abort();
-        return;
-      }
-    }
-  } else {
-    addLog.info(`Model not stream response`);
-    const answer = response.choices?.[0]?.message?.content || '';
-    if (answer) {
-      return answer;
-    }
+  if (answer) {
+    return answer;
  }

  return Promise.reject('Model response empty');
--- a/projects/app/src/service/events/generateQA.ts
+++ b/projects/app/src/service/events/generateQA.ts
@@ -2,7 +2,7 @@ import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/sch
 import { pushQAUsage } from '@/service/support/wallet/usage/push';
 import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
 import { createChatCompletion } from '@fastgpt/service/core/ai/config';
-import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d';
+import type { ChatCompletionMessageParam, StreamChatType } from '@fastgpt/global/core/ai/type.d';
 import { addLog } from '@fastgpt/service/common/system/log';
 import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
 import { replaceVariable } from '@fastgpt/global/common/string/tools';
@@ -17,10 +17,7 @@ import {
 } from '@fastgpt/service/common/string/tiktoken/index';
 import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
 import { loadRequestMessages } from '@fastgpt/service/core/chat/utils';
-import {
-  llmCompletionsBodyFormat,
-  llmStreamResponseToAnswerText
-} from '@fastgpt/service/core/ai/utils';
+import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '@fastgpt/service/core/ai/utils';
 import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
 import {
  chunkAutoChunkSize,
@@ -143,7 +140,9 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
        modelData
      )
    });
-    const answer = await llmStreamResponseToAnswerText(chatResponse);
+    const { text: answer, usage } = await llmResponseToAnswerText(chatResponse);
+    const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
+    const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));

    const qaArr = formatSplitText({ answer, rawText: text, llmModel: modelData }); // 格式化后的QA对

@@ -167,15 +166,15 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
    pushQAUsage({
      teamId: data.teamId,
      tmbId: data.tmbId,
-      inputTokens: await countGptMessagesTokens(messages),
-      outputTokens: await countPromptTokens(answer),
+      inputTokens,
+      outputTokens,
      billId: data.billId,
      model: modelData.model
    });
    addLog.info(`[QA Queue] Finish`, {
      time: Date.now() - startTime,
      splitLength: qaArr.length,
-      usage: chatResponse.usage
+      usage
    });

    return reduceQueueAndReturn();