mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
feat: get tokens from api usage (#4671)
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import OpenAI from '@fastgpt/global/core/ai';
|
||||
import {
|
||||
import type {
|
||||
ChatCompletionCreateParamsNonStreaming,
|
||||
ChatCompletionCreateParamsStreaming,
|
||||
StreamChatType,
|
||||
|
@@ -2,7 +2,7 @@ import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d'
|
||||
import { createChatCompletion } from '../config';
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { loadRequestMessages } from '../../chat/utils';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
|
||||
import {
|
||||
QuestionGuidePrompt,
|
||||
QuestionGuideFooterPrompt
|
||||
@@ -35,7 +35,7 @@ export async function createQuestionGuide({
|
||||
useVision: false
|
||||
});
|
||||
|
||||
const { response: data } = await createChatCompletion({
|
||||
const { response } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
model,
|
||||
@@ -47,21 +47,20 @@ export async function createQuestionGuide({
|
||||
model
|
||||
)
|
||||
});
|
||||
|
||||
const answer = data.choices?.[0]?.message?.content || '';
|
||||
const { text: answer, usage } = await llmResponseToAnswerText(response);
|
||||
|
||||
const start = answer.indexOf('[');
|
||||
const end = answer.lastIndexOf(']');
|
||||
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countPromptTokens(answer);
|
||||
const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages));
|
||||
const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
|
||||
|
||||
if (start === -1 || end === -1) {
|
||||
addLog.warn('Create question guide error', { answer });
|
||||
return {
|
||||
result: [],
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
}
|
||||
|
||||
@@ -81,8 +80,8 @@ export async function createQuestionGuide({
|
||||
|
||||
return {
|
||||
result: [],
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@ import { ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
|
||||
import { getLLMModel } from '../model';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { filterGPTMessageByMaxContext } from '../../chat/utils';
|
||||
import json5 from 'json5';
|
||||
@@ -167,7 +167,7 @@ assistant: ${chatBg}
|
||||
}
|
||||
] as any;
|
||||
|
||||
const { response: result } = await createChatCompletion({
|
||||
const { response } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
stream: false,
|
||||
@@ -178,15 +178,17 @@ assistant: ${chatBg}
|
||||
modelData
|
||||
)
|
||||
});
|
||||
const { text: answer, usage } = await llmResponseToAnswerText(response);
|
||||
const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
|
||||
const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
|
||||
|
||||
let answer = result.choices?.[0]?.message?.content || '';
|
||||
if (!answer) {
|
||||
return {
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens
|
||||
};
|
||||
}
|
||||
|
||||
@@ -200,8 +202,8 @@ assistant: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens
|
||||
};
|
||||
}
|
||||
|
||||
@@ -218,8 +220,8 @@ assistant: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: (Array.isArray(queries) ? queries : []).slice(0, 5),
|
||||
model,
|
||||
inputTokens: await countGptMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer)
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.warn('Query extension failed, not a valid JSON', {
|
||||
@@ -229,8 +231,8 @@ assistant: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@@ -3,9 +3,12 @@ import {
|
||||
ChatCompletionCreateParamsNonStreaming,
|
||||
ChatCompletionCreateParamsStreaming,
|
||||
CompletionFinishReason,
|
||||
StreamChatType
|
||||
StreamChatType,
|
||||
UnStreamChatType,
|
||||
CompletionUsage
|
||||
} from '@fastgpt/global/core/ai/type';
|
||||
import { getLLMModel } from './model';
|
||||
import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants';
|
||||
|
||||
/*
|
||||
Count response max token
|
||||
@@ -97,13 +100,42 @@ export const llmCompletionsBodyFormat = <T extends CompletionsBodyType>(
|
||||
return requestBody as unknown as InferCompletionsBody<T>;
|
||||
};
|
||||
|
||||
export const llmStreamResponseToAnswerText = async (response: StreamChatType) => {
|
||||
export const llmStreamResponseToAnswerText = async (
|
||||
response: StreamChatType
|
||||
): Promise<{
|
||||
text: string;
|
||||
usage?: CompletionUsage;
|
||||
}> => {
|
||||
let answer = '';
|
||||
let usage = getLLMDefaultUsage();
|
||||
for await (const part of response) {
|
||||
usage = part.usage || usage;
|
||||
|
||||
const content = part.choices?.[0]?.delta?.content || '';
|
||||
answer += content;
|
||||
}
|
||||
return parseReasoningContent(answer)[1];
|
||||
return {
|
||||
text: parseReasoningContent(answer)[1],
|
||||
usage
|
||||
};
|
||||
};
|
||||
export const llmUnStreamResponseToAnswerText = async (
|
||||
response: UnStreamChatType
|
||||
): Promise<{
|
||||
text: string;
|
||||
usage?: CompletionUsage;
|
||||
}> => {
|
||||
const answer = response.choices?.[0]?.message?.content || '';
|
||||
return {
|
||||
text: answer,
|
||||
usage: response.usage
|
||||
};
|
||||
};
|
||||
export const llmResponseToAnswerText = async (response: StreamChatType | UnStreamChatType) => {
|
||||
if ('iterator' in response) {
|
||||
return llmStreamResponseToAnswerText(response);
|
||||
}
|
||||
return llmUnStreamResponseToAnswerText(response);
|
||||
};
|
||||
|
||||
// Parse <think></think> tags to think and answer - unstream response
|
||||
@@ -140,7 +172,7 @@ export const parseReasoningStreamContent = () => {
|
||||
part: {
|
||||
choices: {
|
||||
delta: {
|
||||
content?: string;
|
||||
content?: string | null;
|
||||
reasoning_content?: string;
|
||||
};
|
||||
finish_reason?: CompletionFinishReason;
|
||||
|
Reference in New Issue
Block a user