feat: get tokens from api usage (#4671)

This commit is contained in:
Archer
2025-04-27 01:13:38 +08:00
committed by GitHub
parent 0720bbe4da
commit 1465999c46
26 changed files with 270 additions and 223 deletions

View File

@@ -1,5 +1,5 @@
import OpenAI from '@fastgpt/global/core/ai';
import {
import type {
ChatCompletionCreateParamsNonStreaming,
ChatCompletionCreateParamsStreaming,
StreamChatType,

View File

@@ -2,7 +2,7 @@ import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d'
import { createChatCompletion } from '../config';
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
import { loadRequestMessages } from '../../chat/utils';
import { llmCompletionsBodyFormat } from '../utils';
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
import {
QuestionGuidePrompt,
QuestionGuideFooterPrompt
@@ -35,7 +35,7 @@ export async function createQuestionGuide({
useVision: false
});
const { response: data } = await createChatCompletion({
const { response } = await createChatCompletion({
body: llmCompletionsBodyFormat(
{
model,
@@ -47,21 +47,20 @@ export async function createQuestionGuide({
model
)
});
const answer = data.choices?.[0]?.message?.content || '';
const { text: answer, usage } = await llmResponseToAnswerText(response);
const start = answer.indexOf('[');
const end = answer.lastIndexOf(']');
const inputTokens = await countGptMessagesTokens(requestMessages);
const outputTokens = await countPromptTokens(answer);
const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages));
const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
if (start === -1 || end === -1) {
addLog.warn('Create question guide error', { answer });
return {
result: [],
inputTokens: 0,
outputTokens: 0
inputTokens,
outputTokens
};
}
@@ -81,8 +80,8 @@ export async function createQuestionGuide({
return {
result: [],
inputTokens: 0,
outputTokens: 0
inputTokens,
outputTokens
};
}
}

View File

@@ -4,7 +4,7 @@ import { ChatItemType } from '@fastgpt/global/core/chat/type';
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
import { getLLMModel } from '../model';
import { llmCompletionsBodyFormat } from '../utils';
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
import { addLog } from '../../../common/system/log';
import { filterGPTMessageByMaxContext } from '../../chat/utils';
import json5 from 'json5';
@@ -167,7 +167,7 @@ assistant: ${chatBg}
}
] as any;
const { response: result } = await createChatCompletion({
const { response } = await createChatCompletion({
body: llmCompletionsBodyFormat(
{
stream: false,
@@ -178,15 +178,17 @@ assistant: ${chatBg}
modelData
)
});
const { text: answer, usage } = await llmResponseToAnswerText(response);
const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
let answer = result.choices?.[0]?.message?.content || '';
if (!answer) {
return {
rawQuery: query,
extensionQueries: [],
model,
inputTokens: 0,
outputTokens: 0
inputTokens: inputTokens,
outputTokens: outputTokens
};
}
@@ -200,8 +202,8 @@ assistant: ${chatBg}
rawQuery: query,
extensionQueries: [],
model,
inputTokens: 0,
outputTokens: 0
inputTokens: inputTokens,
outputTokens: outputTokens
};
}
@@ -218,8 +220,8 @@ assistant: ${chatBg}
rawQuery: query,
extensionQueries: (Array.isArray(queries) ? queries : []).slice(0, 5),
model,
inputTokens: await countGptMessagesTokens(messages),
outputTokens: await countPromptTokens(answer)
inputTokens,
outputTokens
};
} catch (error) {
addLog.warn('Query extension failed, not a valid JSON', {
@@ -229,8 +231,8 @@ assistant: ${chatBg}
rawQuery: query,
extensionQueries: [],
model,
inputTokens: 0,
outputTokens: 0
inputTokens,
outputTokens
};
}
};

View File

@@ -3,9 +3,12 @@ import {
ChatCompletionCreateParamsNonStreaming,
ChatCompletionCreateParamsStreaming,
CompletionFinishReason,
StreamChatType
StreamChatType,
UnStreamChatType,
CompletionUsage
} from '@fastgpt/global/core/ai/type';
import { getLLMModel } from './model';
import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants';
/*
Count response max token
@@ -97,13 +100,42 @@ export const llmCompletionsBodyFormat = <T extends CompletionsBodyType>(
return requestBody as unknown as InferCompletionsBody<T>;
};
export const llmStreamResponseToAnswerText = async (response: StreamChatType) => {
export const llmStreamResponseToAnswerText = async (
response: StreamChatType
): Promise<{
text: string;
usage?: CompletionUsage;
}> => {
let answer = '';
let usage = getLLMDefaultUsage();
for await (const part of response) {
usage = part.usage || usage;
const content = part.choices?.[0]?.delta?.content || '';
answer += content;
}
return parseReasoningContent(answer)[1];
return {
text: parseReasoningContent(answer)[1],
usage
};
};
export const llmUnStreamResponseToAnswerText = async (
response: UnStreamChatType
): Promise<{
text: string;
usage?: CompletionUsage;
}> => {
const answer = response.choices?.[0]?.message?.content || '';
return {
text: answer,
usage: response.usage
};
};
export const llmResponseToAnswerText = async (response: StreamChatType | UnStreamChatType) => {
if ('iterator' in response) {
return llmStreamResponseToAnswerText(response);
}
return llmUnStreamResponseToAnswerText(response);
};
// Parse <think></think> tags to think and answer - unstream response
@@ -140,7 +172,7 @@ export const parseReasoningStreamContent = () => {
part: {
choices: {
delta: {
content?: string;
content?: string | null;
reasoning_content?: string;
};
finish_reason?: CompletionFinishReason;