mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 20:37:48 +00:00
V4.8.17 feature (#3493)
* split tokens into input and output (#3477) * split tokens into input and output * query extension & tool call & question guide * fix * perf: input and output tokens * perf: tool call if else * perf: remove code * fix: extract usage count * fix: qa usage count --------- Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d';
|
||||
import { createChatCompletion } from '../config';
|
||||
import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { loadRequestMessages } from '../../chat/utils';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
import {
|
||||
@@ -20,7 +20,8 @@ export async function createQuestionGuide({
|
||||
customPrompt?: string;
|
||||
}): Promise<{
|
||||
result: string[];
|
||||
tokens: number;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
}> {
|
||||
const concatMessages: ChatCompletionMessageParam[] = [
|
||||
...messages,
|
||||
@@ -29,6 +30,10 @@ export async function createQuestionGuide({
|
||||
content: `${customPrompt || PROMPT_QUESTION_GUIDE}\n${PROMPT_QUESTION_GUIDE_FOOTER}`
|
||||
}
|
||||
];
|
||||
const requestMessages = await loadRequestMessages({
|
||||
messages: concatMessages,
|
||||
useVision: false
|
||||
});
|
||||
|
||||
const { response: data } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
@@ -36,10 +41,7 @@ export async function createQuestionGuide({
|
||||
model,
|
||||
temperature: 0.1,
|
||||
max_tokens: 200,
|
||||
messages: await loadRequestMessages({
|
||||
messages: concatMessages,
|
||||
useVision: false
|
||||
}),
|
||||
messages: requestMessages,
|
||||
stream: false
|
||||
},
|
||||
model
|
||||
@@ -51,13 +53,15 @@ export async function createQuestionGuide({
|
||||
const start = answer.indexOf('[');
|
||||
const end = answer.lastIndexOf(']');
|
||||
|
||||
const tokens = await countGptMessagesTokens(concatMessages);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countPromptTokens(answer);
|
||||
|
||||
if (start === -1 || end === -1) {
|
||||
addLog.warn('Create question guide error', { answer });
|
||||
return {
|
||||
result: [],
|
||||
tokens: 0
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
|
||||
@@ -69,14 +73,16 @@ export async function createQuestionGuide({
|
||||
try {
|
||||
return {
|
||||
result: json5.parse(jsonStr),
|
||||
tokens
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
||||
return {
|
||||
result: [],
|
||||
tokens: 0
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import { replaceVariable } from '@fastgpt/global/common/string/tools';
|
||||
import { createChatCompletion } from '../config';
|
||||
import { ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
|
||||
import { getLLMModel } from '../model';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
@@ -121,7 +121,8 @@ export const queryExtension = async ({
|
||||
rawQuery: string;
|
||||
extensionQueries: string[];
|
||||
model: string;
|
||||
tokens: number;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
}> => {
|
||||
const systemFewShot = chatBg
|
||||
? `Q: 对话背景。
|
||||
@@ -166,7 +167,8 @@ A: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
tokens: 0
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
|
||||
@@ -181,7 +183,8 @@ A: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: Array.isArray(queries) ? queries : [],
|
||||
model,
|
||||
tokens: await countGptMessagesTokens(messages)
|
||||
inputTokens: await countGptMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer)
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.error(`Query extension error`, error);
|
||||
@@ -189,7 +192,8 @@ A: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
tokens: 0
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@@ -4,6 +4,7 @@ export const getLLMModel = (model?: string) => {
|
||||
global.llmModels[0]
|
||||
);
|
||||
};
|
||||
|
||||
export const getDatasetModel = (model?: string) => {
|
||||
return (
|
||||
global.llmModels
|
||||
|
@@ -1,5 +1,8 @@
|
||||
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
|
||||
import { countMessagesTokens } from '../../../../common/string/tiktoken/index';
|
||||
import {
|
||||
countGptMessagesTokens,
|
||||
countPromptTokens
|
||||
} from '../../../../common/string/tiktoken/index';
|
||||
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { createChatCompletion } from '../../../ai/config';
|
||||
@@ -49,7 +52,7 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
|
||||
|
||||
const chatHistories = getHistories(history, histories);
|
||||
|
||||
const { arg, tokens } = await completions({
|
||||
const { arg, inputTokens, outputTokens } = await completions({
|
||||
...props,
|
||||
histories: chatHistories,
|
||||
cqModel
|
||||
@@ -59,7 +62,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
|
||||
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model: cqModel.model,
|
||||
tokens,
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens,
|
||||
modelType: ModelTypeEnum.llm
|
||||
});
|
||||
|
||||
@@ -72,7 +76,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
query: userChatInput,
|
||||
tokens,
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens,
|
||||
cqList: agents,
|
||||
cqResult: result.value,
|
||||
contextTotalLen: chatHistories.length + 2
|
||||
@@ -82,7 +87,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
|
||||
moduleName: name,
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
tokens
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens
|
||||
}
|
||||
]
|
||||
};
|
||||
@@ -148,7 +154,8 @@ const completions = async ({
|
||||
}
|
||||
|
||||
return {
|
||||
tokens: await countMessagesTokens(messages),
|
||||
inputTokens: await countGptMessagesTokens(requestMessages),
|
||||
outputTokens: await countPromptTokens(answer),
|
||||
arg: { type: id }
|
||||
};
|
||||
};
|
||||
|
@@ -3,7 +3,8 @@ import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/
|
||||
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import {
|
||||
countMessagesTokens,
|
||||
countGptMessagesTokens
|
||||
countGptMessagesTokens,
|
||||
countPromptTokens
|
||||
} from '../../../../common/string/tiktoken/index';
|
||||
import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { createChatCompletion } from '../../../ai/config';
|
||||
@@ -59,7 +60,7 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
|
||||
const extractModel = getLLMModel(model);
|
||||
const chatHistories = getHistories(history, histories);
|
||||
|
||||
const { arg, tokens } = await (async () => {
|
||||
const { arg, inputTokens, outputTokens } = await (async () => {
|
||||
if (extractModel.toolChoice) {
|
||||
return toolChoice({
|
||||
...props,
|
||||
@@ -114,7 +115,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
|
||||
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model: extractModel.model,
|
||||
tokens,
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens,
|
||||
modelType: ModelTypeEnum.llm
|
||||
});
|
||||
|
||||
@@ -126,7 +128,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
query: content,
|
||||
tokens,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
extractDescription: description,
|
||||
extractResult: arg,
|
||||
contextTotalLen: chatHistories.length + 2
|
||||
@@ -136,7 +139,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
|
||||
moduleName: name,
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
tokens
|
||||
inputTokens,
|
||||
outputTokens
|
||||
}
|
||||
]
|
||||
};
|
||||
@@ -249,15 +253,18 @@ const toolChoice = async (props: ActionProps) => {
|
||||
}
|
||||
})();
|
||||
|
||||
const completeMessages: ChatCompletionMessageParam[] = [
|
||||
...filterMessages,
|
||||
const AIMessages: ChatCompletionMessageParam[] = [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
tool_calls: response.choices?.[0]?.message?.tool_calls
|
||||
}
|
||||
];
|
||||
|
||||
const inputTokens = await countGptMessagesTokens(filterMessages, tools);
|
||||
const outputTokens = await countGptMessagesTokens(AIMessages);
|
||||
return {
|
||||
tokens: await countGptMessagesTokens(completeMessages, tools),
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
arg
|
||||
};
|
||||
};
|
||||
@@ -286,17 +293,21 @@ const functionCall = async (props: ActionProps) => {
|
||||
|
||||
try {
|
||||
const arg = JSON.parse(response?.choices?.[0]?.message?.function_call?.arguments || '');
|
||||
const completeMessages: ChatCompletionMessageParam[] = [
|
||||
...filterMessages,
|
||||
|
||||
const AIMessages: ChatCompletionMessageParam[] = [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
function_call: response.choices?.[0]?.message?.function_call
|
||||
}
|
||||
];
|
||||
|
||||
const inputTokens = await countGptMessagesTokens(filterMessages, undefined, functions);
|
||||
const outputTokens = await countGptMessagesTokens(AIMessages);
|
||||
|
||||
return {
|
||||
arg,
|
||||
tokens: await countGptMessagesTokens(completeMessages, undefined, functions)
|
||||
inputTokens,
|
||||
outputTokens
|
||||
};
|
||||
} catch (error) {
|
||||
console.log(response.choices?.[0]?.message);
|
||||
@@ -305,7 +316,8 @@ const functionCall = async (props: ActionProps) => {
|
||||
|
||||
return {
|
||||
arg: {},
|
||||
tokens: 0
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -370,7 +382,8 @@ Human: ${content}`
|
||||
if (!jsonStr) {
|
||||
return {
|
||||
rawResponse: answer,
|
||||
tokens: await countMessagesTokens(messages),
|
||||
inputTokens: await countMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer),
|
||||
arg: {}
|
||||
};
|
||||
}
|
||||
@@ -378,7 +391,8 @@ Human: ${content}`
|
||||
try {
|
||||
return {
|
||||
rawResponse: answer,
|
||||
tokens: await countMessagesTokens(messages),
|
||||
inputTokens: await countMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer),
|
||||
arg: json5.parse(jsonStr) as Record<string, any>
|
||||
};
|
||||
} catch (error) {
|
||||
@@ -386,7 +400,8 @@ Human: ${content}`
|
||||
console.log(error);
|
||||
return {
|
||||
rawResponse: answer,
|
||||
tokens: await countMessagesTokens(messages),
|
||||
inputTokens: await countMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer),
|
||||
arg: {}
|
||||
};
|
||||
}
|
||||
|
@@ -109,7 +109,8 @@ export const runToolWithFunctionCall = async (
|
||||
|
||||
return {
|
||||
dispatchFlowResponse: [toolRunResponse],
|
||||
toolNodeTokens: 0,
|
||||
toolNodeInputTokens: 0,
|
||||
toolNodeOutputTokens: 0,
|
||||
completeMessages: requestMessages,
|
||||
assistantResponses: toolRunResponse.assistantResponses,
|
||||
runTimes: toolRunResponse.runTimes,
|
||||
@@ -126,7 +127,8 @@ export const runToolWithFunctionCall = async (
|
||||
},
|
||||
{
|
||||
dispatchFlowResponse: [toolRunResponse],
|
||||
toolNodeTokens: 0,
|
||||
toolNodeInputTokens: 0,
|
||||
toolNodeOutputTokens: 0,
|
||||
assistantResponses: toolRunResponse.assistantResponses,
|
||||
runTimes: toolRunResponse.runTimes
|
||||
}
|
||||
@@ -340,7 +342,9 @@ export const runToolWithFunctionCall = async (
|
||||
assistantToolMsgParams
|
||||
] as ChatCompletionMessageParam[];
|
||||
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
|
||||
const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
|
||||
// const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
|
||||
const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
|
||||
/*
|
||||
...
|
||||
user
|
||||
@@ -375,7 +379,12 @@ export const runToolWithFunctionCall = async (
|
||||
const runTimes =
|
||||
(response?.runTimes || 0) +
|
||||
flatToolsResponseData.reduce((sum, item) => sum + item.runTimes, 0);
|
||||
const toolNodeTokens = response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens;
|
||||
const toolNodeInputTokens = response?.toolNodeInputTokens
|
||||
? response.toolNodeInputTokens + inputTokens
|
||||
: inputTokens;
|
||||
const toolNodeOutputTokens = response?.toolNodeOutputTokens
|
||||
? response.toolNodeOutputTokens + outputTokens
|
||||
: outputTokens;
|
||||
|
||||
// Check stop signal
|
||||
const hasStopSignal = flatToolsResponseData.some(
|
||||
@@ -408,7 +417,8 @@ export const runToolWithFunctionCall = async (
|
||||
|
||||
return {
|
||||
dispatchFlowResponse,
|
||||
toolNodeTokens,
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
completeMessages,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes,
|
||||
@@ -423,7 +433,8 @@ export const runToolWithFunctionCall = async (
|
||||
},
|
||||
{
|
||||
dispatchFlowResponse,
|
||||
toolNodeTokens,
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes
|
||||
}
|
||||
@@ -435,7 +446,8 @@ export const runToolWithFunctionCall = async (
|
||||
content: answer
|
||||
};
|
||||
const completeMessages = filterMessages.concat(gptAssistantResponse);
|
||||
const tokens = await countGptMessagesTokens(completeMessages, undefined, functions);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
|
||||
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
|
||||
// console.log(tokens, 'response token');
|
||||
|
||||
// concat tool assistant
|
||||
@@ -443,7 +455,12 @@ export const runToolWithFunctionCall = async (
|
||||
|
||||
return {
|
||||
dispatchFlowResponse: response?.dispatchFlowResponse || [],
|
||||
toolNodeTokens: response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens,
|
||||
toolNodeInputTokens: response?.toolNodeInputTokens
|
||||
? response.toolNodeInputTokens + inputTokens
|
||||
: inputTokens,
|
||||
toolNodeOutputTokens: response?.toolNodeOutputTokens
|
||||
? response.toolNodeOutputTokens + outputTokens
|
||||
: outputTokens,
|
||||
completeMessages,
|
||||
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
|
||||
runTimes: (response?.runTimes || 0) + 1
|
||||
|
@@ -165,6 +165,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
toolWorkflowInteractiveResponse,
|
||||
dispatchFlowResponse, // tool flow response
|
||||
toolNodeTokens,
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
completeMessages = [], // The actual message sent to AI(just save text)
|
||||
assistantResponses = [], // FastGPT system store assistant.value response
|
||||
runTimes
|
||||
@@ -225,7 +227,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model,
|
||||
tokens: toolNodeTokens,
|
||||
inputTokens: toolNodeInputTokens,
|
||||
outputTokens: toolNodeOutputTokens,
|
||||
modelType: ModelTypeEnum.llm
|
||||
});
|
||||
const toolAIUsage = externalProvider.openaiAccount?.key ? 0 : totalPoints;
|
||||
@@ -255,6 +258,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
// 展示的积分消耗
|
||||
totalPoints: totalPointsUsage,
|
||||
toolCallTokens: toolNodeTokens,
|
||||
toolCallInputTokens: toolNodeInputTokens,
|
||||
toolCallOutputTokens: toolNodeOutputTokens,
|
||||
childTotalPoints: flatUsages.reduce((sum, item) => sum + item.totalPoints, 0),
|
||||
model: modelName,
|
||||
query: userChatInput,
|
||||
@@ -270,9 +275,10 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
// 工具调用本身的积分消耗
|
||||
{
|
||||
moduleName: name,
|
||||
totalPoints: toolAIUsage,
|
||||
model: modelName,
|
||||
tokens: toolNodeTokens
|
||||
totalPoints: toolAIUsage,
|
||||
inputTokens: toolNodeInputTokens,
|
||||
outputTokens: toolNodeOutputTokens
|
||||
},
|
||||
// 工具的消耗
|
||||
...flatUsages
|
||||
|
@@ -115,7 +115,8 @@ export const runToolWithPromptCall = async (
|
||||
|
||||
return {
|
||||
dispatchFlowResponse: [toolRunResponse],
|
||||
toolNodeTokens: 0,
|
||||
toolNodeInputTokens: 0,
|
||||
toolNodeOutputTokens: 0,
|
||||
completeMessages: concatMessages,
|
||||
assistantResponses: toolRunResponse.assistantResponses,
|
||||
runTimes: toolRunResponse.runTimes,
|
||||
@@ -131,7 +132,8 @@ export const runToolWithPromptCall = async (
|
||||
},
|
||||
{
|
||||
dispatchFlowResponse: [toolRunResponse],
|
||||
toolNodeTokens: 0,
|
||||
toolNodeInputTokens: 0,
|
||||
toolNodeOutputTokens: 0,
|
||||
assistantResponses: toolRunResponse.assistantResponses,
|
||||
runTimes: toolRunResponse.runTimes
|
||||
}
|
||||
@@ -286,15 +288,20 @@ export const runToolWithPromptCall = async (
|
||||
content: replaceAnswer
|
||||
};
|
||||
const completeMessages = filterMessages.concat(gptAssistantResponse);
|
||||
const tokens = await countGptMessagesTokens(completeMessages, undefined);
|
||||
// console.log(tokens, 'response token');
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
|
||||
|
||||
// concat tool assistant
|
||||
const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
|
||||
|
||||
return {
|
||||
dispatchFlowResponse: response?.dispatchFlowResponse || [],
|
||||
toolNodeTokens: response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens,
|
||||
toolNodeInputTokens: response?.toolNodeInputTokens
|
||||
? response.toolNodeInputTokens + inputTokens
|
||||
: inputTokens,
|
||||
toolNodeOutputTokens: response?.toolNodeOutputTokens
|
||||
? response.toolNodeOutputTokens + outputTokens
|
||||
: outputTokens,
|
||||
completeMessages,
|
||||
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
|
||||
runTimes: (response?.runTimes || 0) + 1
|
||||
@@ -366,17 +373,9 @@ export const runToolWithPromptCall = async (
|
||||
function_call: toolJson
|
||||
};
|
||||
|
||||
/*
|
||||
...
|
||||
user
|
||||
assistant: tool data
|
||||
*/
|
||||
const concatToolMessages = [
|
||||
...requestMessages,
|
||||
assistantToolMsgParams
|
||||
] as ChatCompletionMessageParam[];
|
||||
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
|
||||
const tokens = await countGptMessagesTokens(concatToolMessages, undefined);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
|
||||
|
||||
/*
|
||||
...
|
||||
@@ -437,7 +436,12 @@ ANSWER: `;
|
||||
}
|
||||
|
||||
const runTimes = (response?.runTimes || 0) + toolsRunResponse.toolResponse.runTimes;
|
||||
const toolNodeTokens = response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens;
|
||||
const toolNodeInputTokens = response?.toolNodeInputTokens
|
||||
? response.toolNodeInputTokens + inputTokens
|
||||
: inputTokens;
|
||||
const toolNodeOutputTokens = response?.toolNodeOutputTokens
|
||||
? response.toolNodeOutputTokens + outputTokens
|
||||
: outputTokens;
|
||||
|
||||
// Check stop signal
|
||||
const hasStopSignal = toolsRunResponse.toolResponse.flowResponses.some((item) => !!item.toolStop);
|
||||
@@ -460,7 +464,8 @@ ANSWER: `;
|
||||
|
||||
return {
|
||||
dispatchFlowResponse,
|
||||
toolNodeTokens,
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
completeMessages: filterMessages,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes,
|
||||
@@ -475,7 +480,8 @@ ANSWER: `;
|
||||
},
|
||||
{
|
||||
dispatchFlowResponse,
|
||||
toolNodeTokens,
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes
|
||||
}
|
||||
|
@@ -158,7 +158,8 @@ export const runToolWithToolChoice = async (
|
||||
|
||||
return {
|
||||
dispatchFlowResponse: [toolRunResponse],
|
||||
toolNodeTokens: 0,
|
||||
toolNodeInputTokens: 0,
|
||||
toolNodeOutputTokens: 0,
|
||||
completeMessages: requestMessages,
|
||||
assistantResponses: toolRunResponse.assistantResponses,
|
||||
runTimes: toolRunResponse.runTimes,
|
||||
@@ -176,7 +177,8 @@ export const runToolWithToolChoice = async (
|
||||
},
|
||||
{
|
||||
dispatchFlowResponse: [toolRunResponse],
|
||||
toolNodeTokens: 0,
|
||||
toolNodeInputTokens: 0,
|
||||
toolNodeOutputTokens: 0,
|
||||
assistantResponses: toolRunResponse.assistantResponses,
|
||||
runTimes: toolRunResponse.runTimes
|
||||
}
|
||||
@@ -428,7 +430,9 @@ export const runToolWithToolChoice = async (
|
||||
] as ChatCompletionMessageParam[];
|
||||
|
||||
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
|
||||
const tokens = await countGptMessagesTokens(concatToolMessages, tools);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages, tools);
|
||||
const outputTokens = await countGptMessagesTokens(assistantToolMsgParams);
|
||||
|
||||
/*
|
||||
...
|
||||
user
|
||||
@@ -463,7 +467,10 @@ export const runToolWithToolChoice = async (
|
||||
const runTimes =
|
||||
(response?.runTimes || 0) +
|
||||
flatToolsResponseData.reduce((sum, item) => sum + item.runTimes, 0);
|
||||
const toolNodeTokens = response ? response.toolNodeTokens + tokens : tokens;
|
||||
const toolNodeInputTokens = response ? response.toolNodeInputTokens + inputTokens : inputTokens;
|
||||
const toolNodeOutputTokens = response
|
||||
? response.toolNodeOutputTokens + outputTokens
|
||||
: outputTokens;
|
||||
|
||||
// Check stop signal
|
||||
const hasStopSignal = flatToolsResponseData.some(
|
||||
@@ -496,7 +503,8 @@ export const runToolWithToolChoice = async (
|
||||
|
||||
return {
|
||||
dispatchFlowResponse,
|
||||
toolNodeTokens,
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
completeMessages,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes,
|
||||
@@ -512,7 +520,8 @@ export const runToolWithToolChoice = async (
|
||||
},
|
||||
{
|
||||
dispatchFlowResponse,
|
||||
toolNodeTokens,
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes
|
||||
}
|
||||
@@ -524,14 +533,17 @@ export const runToolWithToolChoice = async (
|
||||
content: answer
|
||||
};
|
||||
const completeMessages = filterMessages.concat(gptAssistantResponse);
|
||||
const tokens = await countGptMessagesTokens(completeMessages, tools);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages, tools);
|
||||
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
|
||||
|
||||
// concat tool assistant
|
||||
const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
|
||||
|
||||
return {
|
||||
dispatchFlowResponse: response?.dispatchFlowResponse || [],
|
||||
toolNodeTokens: response ? response.toolNodeTokens + tokens : tokens,
|
||||
toolNodeInputTokens: response ? response.toolNodeInputTokens + inputTokens : inputTokens,
|
||||
toolNodeOutputTokens: response ? response.toolNodeOutputTokens + outputTokens : outputTokens,
|
||||
|
||||
completeMessages,
|
||||
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
|
||||
runTimes: (response?.runTimes || 0) + 1
|
||||
@@ -578,7 +590,8 @@ async function streamResponse({
|
||||
text: content
|
||||
})
|
||||
});
|
||||
} else if (responseChoice?.tool_calls?.[0]) {
|
||||
}
|
||||
if (responseChoice?.tool_calls?.[0]) {
|
||||
const toolCall: ChatCompletionMessageToolCall = responseChoice.tool_calls[0];
|
||||
// In a stream response, only one tool is returned at a time. If have id, description is executing a tool
|
||||
if (toolCall.id || callingTool) {
|
||||
|
@@ -31,7 +31,9 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
|
||||
|
||||
export type RunToolResponse = {
|
||||
dispatchFlowResponse: DispatchFlowResponse[];
|
||||
toolNodeTokens: number;
|
||||
toolNodeTokens?: number; // deprecated
|
||||
toolNodeInputTokens: number;
|
||||
toolNodeOutputTokens: number;
|
||||
completeMessages?: ChatCompletionMessageParam[];
|
||||
assistantResponses?: AIChatItemValueItemType[];
|
||||
toolWorkflowInteractiveResponse?: WorkflowInteractiveResponseType;
|
||||
|
@@ -5,13 +5,17 @@ import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
|
||||
import { createChatCompletion } from '../../../ai/config';
|
||||
import type { ChatCompletion, StreamChatType } from '@fastgpt/global/core/ai/type.d';
|
||||
import type {
|
||||
ChatCompletion,
|
||||
ChatCompletionMessageParam,
|
||||
StreamChatType
|
||||
} from '@fastgpt/global/core/ai/type.d';
|
||||
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
|
||||
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import { postTextCensor } from '../../../../common/api/requestPlusApi';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import type { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
|
||||
import { countMessagesTokens } from '../../../../common/string/tiktoken/index';
|
||||
import { countGptMessagesTokens } from '../../../../common/string/tiktoken/index';
|
||||
import {
|
||||
chats2GPTMessages,
|
||||
chatValue2RuntimePrompt,
|
||||
@@ -214,16 +218,23 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
return Promise.reject(getEmptyResponseTip());
|
||||
}
|
||||
|
||||
const completeMessages = requestMessages.concat({
|
||||
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
content: answerText
|
||||
});
|
||||
const AIMessages: ChatCompletionMessageParam[] = [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
content: answerText
|
||||
}
|
||||
];
|
||||
|
||||
const completeMessages = [...requestMessages, ...AIMessages];
|
||||
const chatCompleteMessages = GPTMessages2Chats(completeMessages);
|
||||
|
||||
const tokens = await countMessagesTokens(chatCompleteMessages);
|
||||
const inputTokens = await countGptMessagesTokens(requestMessages);
|
||||
const outputTokens = await countGptMessagesTokens(AIMessages);
|
||||
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model,
|
||||
tokens,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
modelType: ModelTypeEnum.llm
|
||||
});
|
||||
|
||||
@@ -232,7 +243,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
tokens,
|
||||
tokens: inputTokens + outputTokens,
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens,
|
||||
query: `${userChatInput}`,
|
||||
maxToken: max_tokens,
|
||||
historyPreview: getHistoryPreview(
|
||||
@@ -247,7 +260,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
moduleName: name,
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
tokens
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens
|
||||
}
|
||||
],
|
||||
[DispatchNodeResponseKeyEnum.toolResponses]: answerText,
|
||||
|
@@ -120,14 +120,14 @@ export async function dispatchDatasetSearch(
|
||||
// vector
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model: vectorModel.model,
|
||||
tokens,
|
||||
inputTokens: tokens,
|
||||
modelType: ModelTypeEnum.vector
|
||||
});
|
||||
const responseData: DispatchNodeResponseType & { totalPoints: number } = {
|
||||
totalPoints,
|
||||
query: concatQueries.join('\n'),
|
||||
model: modelName,
|
||||
tokens,
|
||||
inputTokens: tokens,
|
||||
similarity: usingSimilarityFilter ? similarity : undefined,
|
||||
limit,
|
||||
searchMode,
|
||||
@@ -139,19 +139,21 @@ export async function dispatchDatasetSearch(
|
||||
totalPoints,
|
||||
moduleName: node.name,
|
||||
model: modelName,
|
||||
tokens
|
||||
inputTokens: tokens
|
||||
}
|
||||
];
|
||||
|
||||
if (aiExtensionResult) {
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model: aiExtensionResult.model,
|
||||
tokens: aiExtensionResult.tokens,
|
||||
inputTokens: aiExtensionResult.inputTokens,
|
||||
outputTokens: aiExtensionResult.outputTokens,
|
||||
modelType: ModelTypeEnum.llm
|
||||
});
|
||||
|
||||
responseData.totalPoints += totalPoints;
|
||||
responseData.tokens = aiExtensionResult.tokens;
|
||||
responseData.inputTokens = aiExtensionResult.inputTokens;
|
||||
responseData.outputTokens = aiExtensionResult.outputTokens;
|
||||
responseData.extensionModel = modelName;
|
||||
responseData.extensionResult =
|
||||
aiExtensionResult.extensionQueries?.join('\n') ||
|
||||
@@ -161,7 +163,8 @@ export async function dispatchDatasetSearch(
|
||||
totalPoints,
|
||||
moduleName: 'core.module.template.Query extension',
|
||||
model: modelName,
|
||||
tokens: aiExtensionResult.tokens
|
||||
inputTokens: aiExtensionResult.inputTokens,
|
||||
outputTokens: aiExtensionResult.outputTokens
|
||||
});
|
||||
}
|
||||
|
||||
|
@@ -130,8 +130,7 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
|
||||
{
|
||||
moduleName: plugin.name,
|
||||
totalPoints: usagePoints,
|
||||
tokens: 0
|
||||
totalPoints: usagePoints
|
||||
}
|
||||
],
|
||||
[DispatchNodeResponseKeyEnum.toolResponses]: output?.pluginOutput
|
||||
|
@@ -153,8 +153,7 @@ export const dispatchRunAppNode = async (props: Props): Promise<Response> => {
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
|
||||
{
|
||||
moduleName: appData.name,
|
||||
totalPoints: usagePoints,
|
||||
tokens: 0
|
||||
totalPoints: usagePoints
|
||||
}
|
||||
],
|
||||
[DispatchNodeResponseKeyEnum.toolResponses]: text,
|
||||
|
@@ -31,7 +31,7 @@ export const dispatchQueryExtension = async ({
|
||||
const queryExtensionModel = getLLMModel(model);
|
||||
const chatHistories = getHistories(history, histories);
|
||||
|
||||
const { extensionQueries, tokens } = await queryExtension({
|
||||
const { extensionQueries, inputTokens, outputTokens } = await queryExtension({
|
||||
chatBg: systemPrompt,
|
||||
query: userChatInput,
|
||||
histories: chatHistories,
|
||||
@@ -42,7 +42,8 @@ export const dispatchQueryExtension = async ({
|
||||
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model: queryExtensionModel.model,
|
||||
tokens,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
modelType: ModelTypeEnum.llm
|
||||
});
|
||||
|
||||
@@ -59,7 +60,8 @@ export const dispatchQueryExtension = async ({
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
totalPoints,
|
||||
model: modelName,
|
||||
tokens,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
query: userChatInput,
|
||||
textOutput: JSON.stringify(filterSameQueries)
|
||||
},
|
||||
@@ -68,7 +70,8 @@ export const dispatchQueryExtension = async ({
|
||||
moduleName: node.name,
|
||||
totalPoints,
|
||||
model: modelName,
|
||||
tokens
|
||||
inputTokens,
|
||||
outputTokens
|
||||
}
|
||||
],
|
||||
[NodeOutputKeyEnum.text]: JSON.stringify(filterSameQueries)
|
||||
|
Reference in New Issue
Block a user