mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 04:06:18 +00:00
feat: get tokens from api usage (#4671)
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import OpenAI from '@fastgpt/global/core/ai';
|
||||
import {
|
||||
import type {
|
||||
ChatCompletionCreateParamsNonStreaming,
|
||||
ChatCompletionCreateParamsStreaming,
|
||||
StreamChatType,
|
||||
|
@@ -2,7 +2,7 @@ import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d'
|
||||
import { createChatCompletion } from '../config';
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { loadRequestMessages } from '../../chat/utils';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
|
||||
import {
|
||||
QuestionGuidePrompt,
|
||||
QuestionGuideFooterPrompt
|
||||
@@ -35,7 +35,7 @@ export async function createQuestionGuide({
|
||||
useVision: false
|
||||
});
|
||||
|
||||
const { response: data } = await createChatCompletion({
|
||||
const { response } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
model,
|
||||
@@ -47,21 +47,20 @@ export async function createQuestionGuide({
|
||||
model
|
||||
)
|
||||
});
|
||||
|
||||
const answer = data.choices?.[0]?.message?.content || '';
|
||||
const { text: answer, usage } = await llmResponseToAnswerText(response);
|
||||
|
||||
const start = answer.indexOf('[');
|
||||
const end = answer.lastIndexOf(']');
|
||||
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countPromptTokens(answer);
|
||||
const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages));
|
||||
const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
|
||||
|
||||
if (start === -1 || end === -1) {
|
||||
addLog.warn('Create question guide error', { answer });
|
||||
return {
|
||||
result: [],
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
}
|
||||
|
||||
@@ -81,8 +80,8 @@ export async function createQuestionGuide({
|
||||
|
||||
return {
|
||||
result: [],
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@ import { ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
|
||||
import { getLLMModel } from '../model';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { filterGPTMessageByMaxContext } from '../../chat/utils';
|
||||
import json5 from 'json5';
|
||||
@@ -167,7 +167,7 @@ assistant: ${chatBg}
|
||||
}
|
||||
] as any;
|
||||
|
||||
const { response: result } = await createChatCompletion({
|
||||
const { response } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
stream: false,
|
||||
@@ -178,15 +178,17 @@ assistant: ${chatBg}
|
||||
modelData
|
||||
)
|
||||
});
|
||||
const { text: answer, usage } = await llmResponseToAnswerText(response);
|
||||
const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
|
||||
const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
|
||||
|
||||
let answer = result.choices?.[0]?.message?.content || '';
|
||||
if (!answer) {
|
||||
return {
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens
|
||||
};
|
||||
}
|
||||
|
||||
@@ -200,8 +202,8 @@ assistant: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens
|
||||
};
|
||||
}
|
||||
|
||||
@@ -218,8 +220,8 @@ assistant: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: (Array.isArray(queries) ? queries : []).slice(0, 5),
|
||||
model,
|
||||
inputTokens: await countGptMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer)
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.warn('Query extension failed, not a valid JSON', {
|
||||
@@ -229,8 +231,8 @@ assistant: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@@ -3,9 +3,12 @@ import {
|
||||
ChatCompletionCreateParamsNonStreaming,
|
||||
ChatCompletionCreateParamsStreaming,
|
||||
CompletionFinishReason,
|
||||
StreamChatType
|
||||
StreamChatType,
|
||||
UnStreamChatType,
|
||||
CompletionUsage
|
||||
} from '@fastgpt/global/core/ai/type';
|
||||
import { getLLMModel } from './model';
|
||||
import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants';
|
||||
|
||||
/*
|
||||
Count response max token
|
||||
@@ -97,13 +100,42 @@ export const llmCompletionsBodyFormat = <T extends CompletionsBodyType>(
|
||||
return requestBody as unknown as InferCompletionsBody<T>;
|
||||
};
|
||||
|
||||
export const llmStreamResponseToAnswerText = async (response: StreamChatType) => {
|
||||
export const llmStreamResponseToAnswerText = async (
|
||||
response: StreamChatType
|
||||
): Promise<{
|
||||
text: string;
|
||||
usage?: CompletionUsage;
|
||||
}> => {
|
||||
let answer = '';
|
||||
let usage = getLLMDefaultUsage();
|
||||
for await (const part of response) {
|
||||
usage = part.usage || usage;
|
||||
|
||||
const content = part.choices?.[0]?.delta?.content || '';
|
||||
answer += content;
|
||||
}
|
||||
return parseReasoningContent(answer)[1];
|
||||
return {
|
||||
text: parseReasoningContent(answer)[1],
|
||||
usage
|
||||
};
|
||||
};
|
||||
export const llmUnStreamResponseToAnswerText = async (
|
||||
response: UnStreamChatType
|
||||
): Promise<{
|
||||
text: string;
|
||||
usage?: CompletionUsage;
|
||||
}> => {
|
||||
const answer = response.choices?.[0]?.message?.content || '';
|
||||
return {
|
||||
text: answer,
|
||||
usage: response.usage
|
||||
};
|
||||
};
|
||||
export const llmResponseToAnswerText = async (response: StreamChatType | UnStreamChatType) => {
|
||||
if ('iterator' in response) {
|
||||
return llmStreamResponseToAnswerText(response);
|
||||
}
|
||||
return llmUnStreamResponseToAnswerText(response);
|
||||
};
|
||||
|
||||
// Parse <think></think> tags to think and answer - unstream response
|
||||
@@ -140,7 +172,7 @@ export const parseReasoningStreamContent = () => {
|
||||
part: {
|
||||
choices: {
|
||||
delta: {
|
||||
content?: string;
|
||||
content?: string | null;
|
||||
reasoning_content?: string;
|
||||
};
|
||||
finish_reason?: CompletionFinishReason;
|
||||
|
@@ -19,7 +19,7 @@ import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/ty
|
||||
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
|
||||
import { getHandleId } from '@fastgpt/global/core/workflow/utils';
|
||||
import { loadRequestMessages } from '../../../chat/utils';
|
||||
import { llmCompletionsBodyFormat } from '../../../ai/utils';
|
||||
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../../../ai/utils';
|
||||
import { addLog } from '../../../../common/system/log';
|
||||
import { ModelTypeEnum } from '../../../../../global/core/ai/model';
|
||||
import { replaceVariable } from '@fastgpt/global/common/string/tools';
|
||||
@@ -129,7 +129,7 @@ const completions = async ({
|
||||
useVision: false
|
||||
});
|
||||
|
||||
const { response: data } = await createChatCompletion({
|
||||
const { response } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
model: cqModel.model,
|
||||
@@ -141,7 +141,7 @@ const completions = async ({
|
||||
),
|
||||
userKey: externalProvider.openaiAccount
|
||||
});
|
||||
const answer = data.choices?.[0].message?.content || '';
|
||||
const { text: answer, usage } = await llmResponseToAnswerText(response);
|
||||
|
||||
// console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
|
||||
// console.log(answer, '----');
|
||||
@@ -156,8 +156,8 @@ const completions = async ({
|
||||
}
|
||||
|
||||
return {
|
||||
inputTokens: await countGptMessagesTokens(requestMessages),
|
||||
outputTokens: await countPromptTokens(answer),
|
||||
inputTokens: usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages)),
|
||||
outputTokens: usage?.completion_tokens || (await countPromptTokens(answer)),
|
||||
arg: { type: id }
|
||||
};
|
||||
};
|
||||
|
@@ -23,14 +23,14 @@ import { getLLMModel } from '../../../ai/model';
|
||||
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
|
||||
import json5 from 'json5';
|
||||
import {
|
||||
ChatCompletionCreateParams,
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionTool
|
||||
ChatCompletionTool,
|
||||
UnStreamChatType
|
||||
} from '@fastgpt/global/core/ai/type';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
|
||||
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
|
||||
import { llmCompletionsBodyFormat } from '../../../ai/utils';
|
||||
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../../../ai/utils';
|
||||
import { ModelTypeEnum } from '../../../../../global/core/ai/model';
|
||||
import {
|
||||
getExtractJsonPrompt,
|
||||
@@ -76,13 +76,6 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
|
||||
extractModel
|
||||
});
|
||||
}
|
||||
if (extractModel.functionCall) {
|
||||
return functionCall({
|
||||
...props,
|
||||
histories: chatHistories,
|
||||
extractModel
|
||||
});
|
||||
}
|
||||
return completions({
|
||||
...props,
|
||||
histories: chatHistories,
|
||||
@@ -233,9 +226,10 @@ const toolChoice = async (props: ActionProps) => {
|
||||
}
|
||||
];
|
||||
|
||||
const { response } = await createChatCompletion({
|
||||
const { response } = (await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
stream: false,
|
||||
model: extractModel.model,
|
||||
temperature: 0.01,
|
||||
messages: filterMessages,
|
||||
@@ -245,7 +239,7 @@ const toolChoice = async (props: ActionProps) => {
|
||||
extractModel
|
||||
),
|
||||
userKey: externalProvider.openaiAccount
|
||||
});
|
||||
})) as { response: UnStreamChatType };
|
||||
|
||||
const arg: Record<string, any> = (() => {
|
||||
try {
|
||||
@@ -267,8 +261,9 @@ const toolChoice = async (props: ActionProps) => {
|
||||
}
|
||||
];
|
||||
|
||||
const inputTokens = await countGptMessagesTokens(filterMessages, tools);
|
||||
const outputTokens = await countGptMessagesTokens(AIMessages);
|
||||
const usage = response.usage;
|
||||
const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(filterMessages, tools));
|
||||
const outputTokens = usage?.completion_tokens || (await countGptMessagesTokens(AIMessages));
|
||||
return {
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
@@ -276,59 +271,6 @@ const toolChoice = async (props: ActionProps) => {
|
||||
};
|
||||
};
|
||||
|
||||
const functionCall = async (props: ActionProps) => {
|
||||
const { externalProvider, extractModel } = props;
|
||||
|
||||
const { agentFunction, filterMessages } = await getFunctionCallSchema(props);
|
||||
const functions: ChatCompletionCreateParams.Function[] = [agentFunction];
|
||||
|
||||
const { response } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
model: extractModel.model,
|
||||
temperature: 0.01,
|
||||
messages: filterMessages,
|
||||
function_call: {
|
||||
name: agentFunName
|
||||
},
|
||||
functions
|
||||
},
|
||||
extractModel
|
||||
),
|
||||
userKey: externalProvider.openaiAccount
|
||||
});
|
||||
|
||||
try {
|
||||
const arg = JSON.parse(response?.choices?.[0]?.message?.function_call?.arguments || '');
|
||||
|
||||
const AIMessages: ChatCompletionMessageParam[] = [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
function_call: response.choices?.[0]?.message?.function_call
|
||||
}
|
||||
];
|
||||
|
||||
const inputTokens = await countGptMessagesTokens(filterMessages, undefined, functions);
|
||||
const outputTokens = await countGptMessagesTokens(AIMessages);
|
||||
|
||||
return {
|
||||
arg,
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
} catch (error) {
|
||||
console.log(response.choices?.[0]?.message);
|
||||
|
||||
console.log('Your model may not support toll_call', error);
|
||||
|
||||
return {
|
||||
arg: {},
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const completions = async ({
|
||||
extractModel,
|
||||
externalProvider,
|
||||
@@ -373,7 +315,7 @@ Human: ${content}`
|
||||
useVision: false
|
||||
});
|
||||
|
||||
const { response: data } = await createChatCompletion({
|
||||
const { response } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
model: extractModel.model,
|
||||
@@ -385,7 +327,9 @@ Human: ${content}`
|
||||
),
|
||||
userKey: externalProvider.openaiAccount
|
||||
});
|
||||
const answer = data.choices?.[0].message?.content || '';
|
||||
const { text: answer, usage } = await llmResponseToAnswerText(response);
|
||||
const inputTokens = usage?.prompt_tokens || (await countMessagesTokens(messages));
|
||||
const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
|
||||
|
||||
// parse response
|
||||
const jsonStr = sliceJsonStr(answer);
|
||||
@@ -393,8 +337,8 @@ Human: ${content}`
|
||||
if (!jsonStr) {
|
||||
return {
|
||||
rawResponse: answer,
|
||||
inputTokens: await countMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer),
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
arg: {}
|
||||
};
|
||||
}
|
||||
@@ -402,8 +346,8 @@ Human: ${content}`
|
||||
try {
|
||||
return {
|
||||
rawResponse: answer,
|
||||
inputTokens: await countMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer),
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
arg: json5.parse(jsonStr) as Record<string, any>
|
||||
};
|
||||
} catch (error) {
|
||||
@@ -411,8 +355,8 @@ Human: ${content}`
|
||||
console.log(error);
|
||||
return {
|
||||
rawResponse: answer,
|
||||
inputTokens: await countMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer),
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
arg: {}
|
||||
};
|
||||
}
|
||||
|
@@ -13,7 +13,10 @@ import { NextApiResponse } from 'next';
|
||||
import { responseWriteController } from '../../../../../common/response';
|
||||
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import {
|
||||
ChatCompletionRequestMessageRoleEnum,
|
||||
getLLMDefaultUsage
|
||||
} from '@fastgpt/global/core/ai/constants';
|
||||
import { dispatchWorkFlow } from '../../index';
|
||||
import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
|
||||
import json5 from 'json5';
|
||||
@@ -244,17 +247,34 @@ export const runToolWithFunctionCall = async (
|
||||
}
|
||||
});
|
||||
|
||||
const { answer, functionCalls } = await (async () => {
|
||||
if (res && isStreamResponse) {
|
||||
return streamResponse({
|
||||
let { answer, functionCalls, inputTokens, outputTokens } = await (async () => {
|
||||
if (isStreamResponse) {
|
||||
if (!res || res.closed) {
|
||||
return {
|
||||
answer: '',
|
||||
functionCalls: [],
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
const result = await streamResponse({
|
||||
res,
|
||||
toolNodes,
|
||||
stream: aiResponse,
|
||||
workflowStreamResponse
|
||||
});
|
||||
|
||||
return {
|
||||
answer: result.answer,
|
||||
functionCalls: result.functionCalls,
|
||||
inputTokens: result.usage.prompt_tokens,
|
||||
outputTokens: result.usage.completion_tokens
|
||||
};
|
||||
} else {
|
||||
const result = aiResponse as ChatCompletion;
|
||||
const function_call = result.choices?.[0]?.message?.function_call;
|
||||
const usage = result.usage;
|
||||
|
||||
const toolNode = toolNodes.find((node) => node.nodeId === function_call?.name);
|
||||
|
||||
const toolCalls = function_call
|
||||
@@ -270,7 +290,9 @@ export const runToolWithFunctionCall = async (
|
||||
|
||||
return {
|
||||
answer: result.choices?.[0]?.message?.content || '',
|
||||
functionCalls: toolCalls
|
||||
functionCalls: toolCalls,
|
||||
inputTokens: usage?.prompt_tokens,
|
||||
outputTokens: usage?.completion_tokens
|
||||
};
|
||||
}
|
||||
})();
|
||||
@@ -338,7 +360,7 @@ export const runToolWithFunctionCall = async (
|
||||
: flatToolsResponseData;
|
||||
|
||||
const functionCall = functionCalls[0];
|
||||
if (functionCall && !res?.closed) {
|
||||
if (functionCall) {
|
||||
// Run the tool, combine its results, and perform another round of AI calls
|
||||
const assistantToolMsgParams: ChatCompletionAssistantMessageParam = {
|
||||
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
@@ -356,8 +378,9 @@ export const runToolWithFunctionCall = async (
|
||||
] as ChatCompletionMessageParam[];
|
||||
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
|
||||
// const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
|
||||
const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
|
||||
inputTokens =
|
||||
inputTokens || (await countGptMessagesTokens(requestMessages, undefined, functions));
|
||||
outputTokens = outputTokens || (await countGptMessagesTokens([assistantToolMsgParams]));
|
||||
/*
|
||||
...
|
||||
user
|
||||
@@ -459,8 +482,9 @@ export const runToolWithFunctionCall = async (
|
||||
content: answer
|
||||
};
|
||||
const completeMessages = filterMessages.concat(gptAssistantResponse);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
|
||||
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
|
||||
inputTokens =
|
||||
inputTokens || (await countGptMessagesTokens(requestMessages, undefined, functions));
|
||||
outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));
|
||||
// console.log(tokens, 'response token');
|
||||
|
||||
// concat tool assistant
|
||||
@@ -500,8 +524,10 @@ async function streamResponse({
|
||||
let textAnswer = '';
|
||||
let functionCalls: ChatCompletionMessageFunctionCall[] = [];
|
||||
let functionId = getNanoid();
|
||||
let usage = getLLMDefaultUsage();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
break;
|
||||
@@ -522,7 +548,7 @@ async function streamResponse({
|
||||
});
|
||||
} else if (responseChoice.function_call) {
|
||||
const functionCall: {
|
||||
arguments: string;
|
||||
arguments?: string;
|
||||
name?: string;
|
||||
} = responseChoice.function_call;
|
||||
|
||||
@@ -532,11 +558,9 @@ async function streamResponse({
|
||||
const toolNode = toolNodes.find((item) => item.nodeId === functionCall?.name);
|
||||
|
||||
if (toolNode) {
|
||||
if (functionCall?.arguments === undefined) {
|
||||
functionCall.arguments = '';
|
||||
}
|
||||
functionCalls.push({
|
||||
...functionCall,
|
||||
arguments: functionCall.arguments || '',
|
||||
id: functionId,
|
||||
name: functionCall.name,
|
||||
toolName: toolNode.name,
|
||||
@@ -552,7 +576,7 @@ async function streamResponse({
|
||||
toolName: toolNode.name,
|
||||
toolAvatar: toolNode.avatar,
|
||||
functionName: functionCall.name,
|
||||
params: functionCall.arguments,
|
||||
params: functionCall.arguments || '',
|
||||
response: ''
|
||||
}
|
||||
}
|
||||
@@ -585,5 +609,5 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer: textAnswer, functionCalls };
|
||||
return { answer: textAnswer, functionCalls, usage };
|
||||
}
|
||||
|
@@ -171,7 +171,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
const {
|
||||
toolWorkflowInteractiveResponse,
|
||||
dispatchFlowResponse, // tool flow response
|
||||
toolNodeTokens,
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
completeMessages = [], // The actual message sent to AI(just save text)
|
||||
@@ -271,7 +270,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
// 展示的积分消耗
|
||||
totalPoints: totalPointsUsage,
|
||||
toolCallTokens: toolNodeTokens,
|
||||
toolCallInputTokens: toolNodeInputTokens,
|
||||
toolCallOutputTokens: toolNodeOutputTokens,
|
||||
childTotalPoints: flatUsages.reduce((sum, item) => sum + item.totalPoints, 0),
|
||||
|
@@ -9,7 +9,10 @@ import { NextApiResponse } from 'next';
|
||||
import { responseWriteController } from '../../../../../common/response';
|
||||
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import {
|
||||
ChatCompletionRequestMessageRoleEnum,
|
||||
getLLMDefaultUsage
|
||||
} from '@fastgpt/global/core/ai/constants';
|
||||
import { dispatchWorkFlow } from '../../index';
|
||||
import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
|
||||
import json5 from 'json5';
|
||||
@@ -256,9 +259,18 @@ export const runToolWithPromptCall = async (
|
||||
}
|
||||
});
|
||||
|
||||
const { answer, reasoning, finish_reason } = await (async () => {
|
||||
if (res && isStreamResponse) {
|
||||
const { answer, reasoning, finish_reason } = await streamResponse({
|
||||
let { answer, reasoning, finish_reason, inputTokens, outputTokens } = await (async () => {
|
||||
if (isStreamResponse) {
|
||||
if (!res || res.closed) {
|
||||
return {
|
||||
answer: '',
|
||||
reasoning: '',
|
||||
finish_reason: 'close' as const,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
const { answer, reasoning, finish_reason, usage } = await streamResponse({
|
||||
res,
|
||||
toolNodes,
|
||||
stream: aiResponse,
|
||||
@@ -266,18 +278,28 @@ export const runToolWithPromptCall = async (
|
||||
aiChatReasoning
|
||||
});
|
||||
|
||||
return { answer, reasoning, finish_reason };
|
||||
return {
|
||||
answer,
|
||||
reasoning,
|
||||
finish_reason,
|
||||
inputTokens: usage.prompt_tokens,
|
||||
outputTokens: usage.completion_tokens
|
||||
};
|
||||
} else {
|
||||
const finish_reason = aiResponse.choices?.[0]?.finish_reason as CompletionFinishReason;
|
||||
const content = aiResponse.choices?.[0]?.message?.content || '';
|
||||
// @ts-ignore
|
||||
const reasoningContent: string = aiResponse.choices?.[0]?.message?.reasoning_content || '';
|
||||
const usage = aiResponse.usage;
|
||||
|
||||
// API already parse reasoning content
|
||||
if (reasoningContent || !aiChatReasoning) {
|
||||
return {
|
||||
answer: content,
|
||||
reasoning: reasoningContent,
|
||||
finish_reason
|
||||
finish_reason,
|
||||
inputTokens: usage?.prompt_tokens,
|
||||
outputTokens: usage?.completion_tokens
|
||||
};
|
||||
}
|
||||
|
||||
@@ -285,7 +307,9 @@ export const runToolWithPromptCall = async (
|
||||
return {
|
||||
answer,
|
||||
reasoning: think,
|
||||
finish_reason
|
||||
finish_reason,
|
||||
inputTokens: usage?.prompt_tokens,
|
||||
outputTokens: usage?.completion_tokens
|
||||
};
|
||||
}
|
||||
})();
|
||||
@@ -336,8 +360,8 @@ export const runToolWithPromptCall = async (
|
||||
reasoning_text: undefined
|
||||
});
|
||||
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
|
||||
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
|
||||
outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));
|
||||
|
||||
// concat tool assistant
|
||||
const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
|
||||
@@ -423,8 +447,8 @@ export const runToolWithPromptCall = async (
|
||||
};
|
||||
|
||||
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
|
||||
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
|
||||
outputTokens = outputTokens || (await countGptMessagesTokens([assistantToolMsgParams]));
|
||||
|
||||
/*
|
||||
...
|
||||
@@ -559,9 +583,12 @@ async function streamResponse({
|
||||
let answer = '';
|
||||
let reasoning = '';
|
||||
let finish_reason: CompletionFinishReason = null;
|
||||
let usage = getLLMDefaultUsage();
|
||||
|
||||
const { parsePart, getStartTagBuffer } = parseReasoningStreamContent();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
finish_reason = 'close';
|
||||
@@ -629,7 +656,7 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer, reasoning, finish_reason };
|
||||
return { answer, reasoning, finish_reason, usage };
|
||||
}
|
||||
|
||||
const parseAnswer = (
|
||||
|
@@ -14,7 +14,10 @@ import { NextApiResponse } from 'next';
|
||||
import { responseWriteController } from '../../../../../common/response';
|
||||
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import {
|
||||
ChatCompletionRequestMessageRoleEnum,
|
||||
getLLMDefaultUsage
|
||||
} from '@fastgpt/global/core/ai/constants';
|
||||
import { dispatchWorkFlow } from '../../index';
|
||||
import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d';
|
||||
import json5 from 'json5';
|
||||
@@ -301,19 +304,38 @@ export const runToolWithToolChoice = async (
|
||||
}
|
||||
});
|
||||
|
||||
const { answer, toolCalls, finish_reason } = await (async () => {
|
||||
if (res && isStreamResponse) {
|
||||
return streamResponse({
|
||||
let { answer, toolCalls, finish_reason, inputTokens, outputTokens } = await (async () => {
|
||||
if (isStreamResponse) {
|
||||
if (!res || res.closed) {
|
||||
return {
|
||||
answer: '',
|
||||
toolCalls: [],
|
||||
finish_reason: 'close' as const,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
|
||||
const result = await streamResponse({
|
||||
res,
|
||||
workflowStreamResponse,
|
||||
toolNodes,
|
||||
stream: aiResponse
|
||||
});
|
||||
|
||||
return {
|
||||
answer: result.answer,
|
||||
toolCalls: result.toolCalls,
|
||||
finish_reason: result.finish_reason,
|
||||
inputTokens: result.usage.prompt_tokens,
|
||||
outputTokens: result.usage.completion_tokens
|
||||
};
|
||||
} else {
|
||||
const result = aiResponse as ChatCompletion;
|
||||
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
|
||||
const calls = result.choices?.[0]?.message?.tool_calls || [];
|
||||
const answer = result.choices?.[0]?.message?.content || '';
|
||||
const usage = result.usage;
|
||||
|
||||
// 加上name和avatar
|
||||
const toolCalls = calls.map((tool) => {
|
||||
@@ -353,7 +375,9 @@ export const runToolWithToolChoice = async (
|
||||
return {
|
||||
answer,
|
||||
toolCalls: toolCalls,
|
||||
finish_reason
|
||||
finish_reason,
|
||||
inputTokens: usage?.prompt_tokens,
|
||||
outputTokens: usage?.completion_tokens
|
||||
};
|
||||
}
|
||||
})();
|
||||
@@ -447,7 +471,7 @@ export const runToolWithToolChoice = async (
|
||||
? response.dispatchFlowResponse.concat(flatToolsResponseData)
|
||||
: flatToolsResponseData;
|
||||
|
||||
if (toolCalls.length > 0 && !res?.closed) {
|
||||
if (toolCalls.length > 0) {
|
||||
// Run the tool, combine its results, and perform another round of AI calls
|
||||
const assistantToolMsgParams: ChatCompletionAssistantMessageParam[] = [
|
||||
...(answer
|
||||
@@ -475,8 +499,8 @@ export const runToolWithToolChoice = async (
|
||||
] as ChatCompletionMessageParam[];
|
||||
|
||||
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages, tools);
|
||||
const outputTokens = await countGptMessagesTokens(assistantToolMsgParams);
|
||||
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
|
||||
outputTokens = outputTokens || (await countGptMessagesTokens(assistantToolMsgParams));
|
||||
|
||||
/*
|
||||
...
|
||||
@@ -580,8 +604,8 @@ export const runToolWithToolChoice = async (
|
||||
content: answer
|
||||
};
|
||||
const completeMessages = filterMessages.concat(gptAssistantResponse);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages, tools);
|
||||
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
|
||||
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools));
|
||||
outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse]));
|
||||
|
||||
// concat tool assistant
|
||||
const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
|
||||
@@ -619,8 +643,10 @@ async function streamResponse({
|
||||
let callingTool: { name: string; arguments: string } | null = null;
|
||||
let toolCalls: ChatCompletionMessageToolCall[] = [];
|
||||
let finishReason: CompletionFinishReason = null;
|
||||
let usage = getLLMDefaultUsage();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
finishReason = 'close';
|
||||
@@ -644,6 +670,7 @@ async function streamResponse({
|
||||
});
|
||||
}
|
||||
if (responseChoice?.tool_calls?.[0]) {
|
||||
// @ts-ignore
|
||||
const toolCall: ChatCompletionMessageToolCall = responseChoice.tool_calls[0];
|
||||
// In a stream response, only one tool is returned at a time. If have id, description is executing a tool
|
||||
if (toolCall.id || callingTool) {
|
||||
@@ -715,5 +742,5 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer: textAnswer, toolCalls, finish_reason: finishReason };
|
||||
return { answer: textAnswer, toolCalls, finish_reason: finishReason, usage };
|
||||
}
|
||||
|
@@ -36,7 +36,6 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
|
||||
|
||||
export type RunToolResponse = {
|
||||
dispatchFlowResponse: DispatchFlowResponse[];
|
||||
toolNodeTokens?: number; // deprecated
|
||||
toolNodeInputTokens: number;
|
||||
toolNodeOutputTokens: number;
|
||||
completeMessages?: ChatCompletionMessageParam[];
|
||||
|
@@ -9,11 +9,15 @@ import { createChatCompletion } from '../../../ai/config';
|
||||
import type {
|
||||
ChatCompletionMessageParam,
|
||||
CompletionFinishReason,
|
||||
CompletionUsage,
|
||||
StreamChatType
|
||||
} from '@fastgpt/global/core/ai/type.d';
|
||||
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
|
||||
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import {
|
||||
ChatCompletionRequestMessageRoleEnum,
|
||||
getLLMDefaultUsage
|
||||
} from '@fastgpt/global/core/ai/constants';
|
||||
import type {
|
||||
ChatDispatchProps,
|
||||
DispatchNodeResultType
|
||||
@@ -199,17 +203,19 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
}
|
||||
});
|
||||
|
||||
const { answerText, reasoningText, finish_reason } = await (async () => {
|
||||
let { answerText, reasoningText, finish_reason, inputTokens, outputTokens } = await (async () => {
|
||||
if (isStreamResponse) {
|
||||
if (!res) {
|
||||
if (!res || res.closed) {
|
||||
return {
|
||||
answerText: '',
|
||||
reasoningText: '',
|
||||
finish_reason: 'close' as const
|
||||
finish_reason: 'close' as const,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
// sse response
|
||||
const { answer, reasoning, finish_reason } = await streamResponse({
|
||||
const { answer, reasoning, finish_reason, usage } = await streamResponse({
|
||||
res,
|
||||
stream: response,
|
||||
aiChatReasoning,
|
||||
@@ -221,10 +227,13 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
return {
|
||||
answerText: answer,
|
||||
reasoningText: reasoning,
|
||||
finish_reason
|
||||
finish_reason,
|
||||
inputTokens: usage?.prompt_tokens,
|
||||
outputTokens: usage?.completion_tokens
|
||||
};
|
||||
} else {
|
||||
const finish_reason = response.choices?.[0]?.finish_reason as CompletionFinishReason;
|
||||
const usage = response.usage;
|
||||
|
||||
const { content, reasoningContent } = (() => {
|
||||
const content = response.choices?.[0]?.message?.content || '';
|
||||
@@ -269,7 +278,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
return {
|
||||
answerText: content,
|
||||
reasoningText: reasoningContent,
|
||||
finish_reason
|
||||
finish_reason,
|
||||
inputTokens: usage?.prompt_tokens,
|
||||
outputTokens: usage?.completion_tokens
|
||||
};
|
||||
}
|
||||
})();
|
||||
@@ -289,8 +300,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
const completeMessages = [...requestMessages, ...AIMessages];
|
||||
const chatCompleteMessages = GPTMessages2Chats(completeMessages);
|
||||
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countGptMessagesTokens(AIMessages);
|
||||
inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages));
|
||||
outputTokens = outputTokens || (await countGptMessagesTokens(AIMessages));
|
||||
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model,
|
||||
@@ -305,7 +316,6 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
tokens: inputTokens + outputTokens,
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens,
|
||||
query: `${userChatInput}`,
|
||||
@@ -565,9 +575,13 @@ async function streamResponse({
|
||||
let answer = '';
|
||||
let reasoning = '';
|
||||
let finish_reason: CompletionFinishReason = null;
|
||||
let usage: CompletionUsage = getLLMDefaultUsage();
|
||||
|
||||
const { parsePart, getStartTagBuffer } = parseReasoningStreamContent();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
finish_reason = 'close';
|
||||
@@ -614,5 +628,5 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer, reasoning, finish_reason };
|
||||
return { answer, reasoning, finish_reason, usage };
|
||||
}
|
||||
|
Reference in New Issue
Block a user