V4.8.17 feature (#3493)

* split tokens into input and output (#3477)

* split tokens into input and output

* query extension & tool call & question guide

* fix

* perf: input and output tokens

* perf: tool call if else

* perf: remove code

* fix: extract usage count

* fix: qa usage count

---------

Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
Archer
2024-12-30 10:13:25 +08:00
committed by GitHub
parent da2831b948
commit 50bf7f9a3b
46 changed files with 467 additions and 230 deletions

View File

@@ -1,6 +1,6 @@
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d';
import { createChatCompletion } from '../config';
import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
import { loadRequestMessages } from '../../chat/utils';
import { llmCompletionsBodyFormat } from '../utils';
import {
@@ -20,7 +20,8 @@ export async function createQuestionGuide({
customPrompt?: string;
}): Promise<{
result: string[];
tokens: number;
inputTokens: number;
outputTokens: number;
}> {
const concatMessages: ChatCompletionMessageParam[] = [
...messages,
@@ -29,6 +30,10 @@ export async function createQuestionGuide({
content: `${customPrompt || PROMPT_QUESTION_GUIDE}\n${PROMPT_QUESTION_GUIDE_FOOTER}`
}
];
const requestMessages = await loadRequestMessages({
messages: concatMessages,
useVision: false
});
const { response: data } = await createChatCompletion({
body: llmCompletionsBodyFormat(
@@ -36,10 +41,7 @@ export async function createQuestionGuide({
model,
temperature: 0.1,
max_tokens: 200,
messages: await loadRequestMessages({
messages: concatMessages,
useVision: false
}),
messages: requestMessages,
stream: false
},
model
@@ -51,13 +53,15 @@ export async function createQuestionGuide({
const start = answer.indexOf('[');
const end = answer.lastIndexOf(']');
const tokens = await countGptMessagesTokens(concatMessages);
const inputTokens = await countGptMessagesTokens(requestMessages);
const outputTokens = await countPromptTokens(answer);
if (start === -1 || end === -1) {
addLog.warn('Create question guide error', { answer });
return {
result: [],
tokens: 0
inputTokens: 0,
outputTokens: 0
};
}
@@ -69,14 +73,16 @@ export async function createQuestionGuide({
try {
return {
result: json5.parse(jsonStr),
tokens
inputTokens,
outputTokens
};
} catch (error) {
console.log(error);
return {
result: [],
tokens: 0
inputTokens: 0,
outputTokens: 0
};
}
}

View File

@@ -1,7 +1,7 @@
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { createChatCompletion } from '../config';
import { ChatItemType } from '@fastgpt/global/core/chat/type';
import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
import { getLLMModel } from '../model';
import { llmCompletionsBodyFormat } from '../utils';
@@ -121,7 +121,8 @@ export const queryExtension = async ({
rawQuery: string;
extensionQueries: string[];
model: string;
tokens: number;
inputTokens: number;
outputTokens: number;
}> => {
const systemFewShot = chatBg
? `Q: 对话背景。
@@ -166,7 +167,8 @@ A: ${chatBg}
rawQuery: query,
extensionQueries: [],
model,
tokens: 0
inputTokens: 0,
outputTokens: 0
};
}
@@ -181,7 +183,8 @@ A: ${chatBg}
rawQuery: query,
extensionQueries: Array.isArray(queries) ? queries : [],
model,
tokens: await countGptMessagesTokens(messages)
inputTokens: await countGptMessagesTokens(messages),
outputTokens: await countPromptTokens(answer)
};
} catch (error) {
addLog.error(`Query extension error`, error);
@@ -189,7 +192,8 @@ A: ${chatBg}
rawQuery: query,
extensionQueries: [],
model,
tokens: 0
inputTokens: 0,
outputTokens: 0
};
}
};

View File

@@ -4,6 +4,7 @@ export const getLLMModel = (model?: string) => {
global.llmModels[0]
);
};
export const getDatasetModel = (model?: string) => {
return (
global.llmModels

View File

@@ -1,5 +1,8 @@
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
import { countMessagesTokens } from '../../../../common/string/tiktoken/index';
import {
countGptMessagesTokens,
countPromptTokens
} from '../../../../common/string/tiktoken/index';
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { createChatCompletion } from '../../../ai/config';
@@ -49,7 +52,7 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
const chatHistories = getHistories(history, histories);
const { arg, tokens } = await completions({
const { arg, inputTokens, outputTokens } = await completions({
...props,
histories: chatHistories,
cqModel
@@ -59,7 +62,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
const { totalPoints, modelName } = formatModelChars2Points({
model: cqModel.model,
tokens,
inputTokens: inputTokens,
outputTokens: outputTokens,
modelType: ModelTypeEnum.llm
});
@@ -72,7 +76,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
model: modelName,
query: userChatInput,
tokens,
inputTokens: inputTokens,
outputTokens: outputTokens,
cqList: agents,
cqResult: result.value,
contextTotalLen: chatHistories.length + 2
@@ -82,7 +87,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
moduleName: name,
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
model: modelName,
tokens
inputTokens: inputTokens,
outputTokens: outputTokens
}
]
};
@@ -148,7 +154,8 @@ const completions = async ({
}
return {
tokens: await countMessagesTokens(messages),
inputTokens: await countGptMessagesTokens(requestMessages),
outputTokens: await countPromptTokens(answer),
arg: { type: id }
};
};

View File

@@ -3,7 +3,8 @@ import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import {
countMessagesTokens,
countGptMessagesTokens
countGptMessagesTokens,
countPromptTokens
} from '../../../../common/string/tiktoken/index';
import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { createChatCompletion } from '../../../ai/config';
@@ -59,7 +60,7 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
const extractModel = getLLMModel(model);
const chatHistories = getHistories(history, histories);
const { arg, tokens } = await (async () => {
const { arg, inputTokens, outputTokens } = await (async () => {
if (extractModel.toolChoice) {
return toolChoice({
...props,
@@ -114,7 +115,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
const { totalPoints, modelName } = formatModelChars2Points({
model: extractModel.model,
tokens,
inputTokens: inputTokens,
outputTokens: outputTokens,
modelType: ModelTypeEnum.llm
});
@@ -126,7 +128,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
model: modelName,
query: content,
tokens,
inputTokens,
outputTokens,
extractDescription: description,
extractResult: arg,
contextTotalLen: chatHistories.length + 2
@@ -136,7 +139,8 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
moduleName: name,
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
model: modelName,
tokens
inputTokens,
outputTokens
}
]
};
@@ -249,15 +253,18 @@ const toolChoice = async (props: ActionProps) => {
}
})();
const completeMessages: ChatCompletionMessageParam[] = [
...filterMessages,
const AIMessages: ChatCompletionMessageParam[] = [
{
role: ChatCompletionRequestMessageRoleEnum.Assistant,
tool_calls: response.choices?.[0]?.message?.tool_calls
}
];
const inputTokens = await countGptMessagesTokens(filterMessages, tools);
const outputTokens = await countGptMessagesTokens(AIMessages);
return {
tokens: await countGptMessagesTokens(completeMessages, tools),
inputTokens,
outputTokens,
arg
};
};
@@ -286,17 +293,21 @@ const functionCall = async (props: ActionProps) => {
try {
const arg = JSON.parse(response?.choices?.[0]?.message?.function_call?.arguments || '');
const completeMessages: ChatCompletionMessageParam[] = [
...filterMessages,
const AIMessages: ChatCompletionMessageParam[] = [
{
role: ChatCompletionRequestMessageRoleEnum.Assistant,
function_call: response.choices?.[0]?.message?.function_call
}
];
const inputTokens = await countGptMessagesTokens(filterMessages, undefined, functions);
const outputTokens = await countGptMessagesTokens(AIMessages);
return {
arg,
tokens: await countGptMessagesTokens(completeMessages, undefined, functions)
inputTokens,
outputTokens
};
} catch (error) {
console.log(response.choices?.[0]?.message);
@@ -305,7 +316,8 @@ const functionCall = async (props: ActionProps) => {
return {
arg: {},
tokens: 0
inputTokens: 0,
outputTokens: 0
};
}
};
@@ -370,7 +382,8 @@ Human: ${content}`
if (!jsonStr) {
return {
rawResponse: answer,
tokens: await countMessagesTokens(messages),
inputTokens: await countMessagesTokens(messages),
outputTokens: await countPromptTokens(answer),
arg: {}
};
}
@@ -378,7 +391,8 @@ Human: ${content}`
try {
return {
rawResponse: answer,
tokens: await countMessagesTokens(messages),
inputTokens: await countMessagesTokens(messages),
outputTokens: await countPromptTokens(answer),
arg: json5.parse(jsonStr) as Record<string, any>
};
} catch (error) {
@@ -386,7 +400,8 @@ Human: ${content}`
console.log(error);
return {
rawResponse: answer,
tokens: await countMessagesTokens(messages),
inputTokens: await countMessagesTokens(messages),
outputTokens: await countPromptTokens(answer),
arg: {}
};
}

View File

@@ -109,7 +109,8 @@ export const runToolWithFunctionCall = async (
return {
dispatchFlowResponse: [toolRunResponse],
toolNodeTokens: 0,
toolNodeInputTokens: 0,
toolNodeOutputTokens: 0,
completeMessages: requestMessages,
assistantResponses: toolRunResponse.assistantResponses,
runTimes: toolRunResponse.runTimes,
@@ -126,7 +127,8 @@ export const runToolWithFunctionCall = async (
},
{
dispatchFlowResponse: [toolRunResponse],
toolNodeTokens: 0,
toolNodeInputTokens: 0,
toolNodeOutputTokens: 0,
assistantResponses: toolRunResponse.assistantResponses,
runTimes: toolRunResponse.runTimes
}
@@ -340,7 +342,9 @@ export const runToolWithFunctionCall = async (
assistantToolMsgParams
] as ChatCompletionMessageParam[];
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
// const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions);
const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
/*
...
user
@@ -375,7 +379,12 @@ export const runToolWithFunctionCall = async (
const runTimes =
(response?.runTimes || 0) +
flatToolsResponseData.reduce((sum, item) => sum + item.runTimes, 0);
const toolNodeTokens = response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens;
const toolNodeInputTokens = response?.toolNodeInputTokens
? response.toolNodeInputTokens + inputTokens
: inputTokens;
const toolNodeOutputTokens = response?.toolNodeOutputTokens
? response.toolNodeOutputTokens + outputTokens
: outputTokens;
// Check stop signal
const hasStopSignal = flatToolsResponseData.some(
@@ -408,7 +417,8 @@ export const runToolWithFunctionCall = async (
return {
dispatchFlowResponse,
toolNodeTokens,
toolNodeInputTokens,
toolNodeOutputTokens,
completeMessages,
assistantResponses: toolNodeAssistants,
runTimes,
@@ -423,7 +433,8 @@ export const runToolWithFunctionCall = async (
},
{
dispatchFlowResponse,
toolNodeTokens,
toolNodeInputTokens,
toolNodeOutputTokens,
assistantResponses: toolNodeAssistants,
runTimes
}
@@ -435,7 +446,8 @@ export const runToolWithFunctionCall = async (
content: answer
};
const completeMessages = filterMessages.concat(gptAssistantResponse);
const tokens = await countGptMessagesTokens(completeMessages, undefined, functions);
const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions);
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
// console.log(tokens, 'response token');
// concat tool assistant
@@ -443,7 +455,12 @@ export const runToolWithFunctionCall = async (
return {
dispatchFlowResponse: response?.dispatchFlowResponse || [],
toolNodeTokens: response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens,
toolNodeInputTokens: response?.toolNodeInputTokens
? response.toolNodeInputTokens + inputTokens
: inputTokens,
toolNodeOutputTokens: response?.toolNodeOutputTokens
? response.toolNodeOutputTokens + outputTokens
: outputTokens,
completeMessages,
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
runTimes: (response?.runTimes || 0) + 1

View File

@@ -165,6 +165,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
toolWorkflowInteractiveResponse,
dispatchFlowResponse, // tool flow response
toolNodeTokens,
toolNodeInputTokens,
toolNodeOutputTokens,
completeMessages = [], // The actual message sent to AI(just save text)
assistantResponses = [], // FastGPT system store assistant.value response
runTimes
@@ -225,7 +227,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
const { totalPoints, modelName } = formatModelChars2Points({
model,
tokens: toolNodeTokens,
inputTokens: toolNodeInputTokens,
outputTokens: toolNodeOutputTokens,
modelType: ModelTypeEnum.llm
});
const toolAIUsage = externalProvider.openaiAccount?.key ? 0 : totalPoints;
@@ -255,6 +258,8 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
// 展示的积分消耗
totalPoints: totalPointsUsage,
toolCallTokens: toolNodeTokens,
toolCallInputTokens: toolNodeInputTokens,
toolCallOutputTokens: toolNodeOutputTokens,
childTotalPoints: flatUsages.reduce((sum, item) => sum + item.totalPoints, 0),
model: modelName,
query: userChatInput,
@@ -270,9 +275,10 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
// 工具调用本身的积分消耗
{
moduleName: name,
totalPoints: toolAIUsage,
model: modelName,
tokens: toolNodeTokens
totalPoints: toolAIUsage,
inputTokens: toolNodeInputTokens,
outputTokens: toolNodeOutputTokens
},
// 工具的消耗
...flatUsages

View File

@@ -115,7 +115,8 @@ export const runToolWithPromptCall = async (
return {
dispatchFlowResponse: [toolRunResponse],
toolNodeTokens: 0,
toolNodeInputTokens: 0,
toolNodeOutputTokens: 0,
completeMessages: concatMessages,
assistantResponses: toolRunResponse.assistantResponses,
runTimes: toolRunResponse.runTimes,
@@ -131,7 +132,8 @@ export const runToolWithPromptCall = async (
},
{
dispatchFlowResponse: [toolRunResponse],
toolNodeTokens: 0,
toolNodeInputTokens: 0,
toolNodeOutputTokens: 0,
assistantResponses: toolRunResponse.assistantResponses,
runTimes: toolRunResponse.runTimes
}
@@ -286,15 +288,20 @@ export const runToolWithPromptCall = async (
content: replaceAnswer
};
const completeMessages = filterMessages.concat(gptAssistantResponse);
const tokens = await countGptMessagesTokens(completeMessages, undefined);
// console.log(tokens, 'response token');
const inputTokens = await countGptMessagesTokens(requestMessages);
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
// concat tool assistant
const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
return {
dispatchFlowResponse: response?.dispatchFlowResponse || [],
toolNodeTokens: response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens,
toolNodeInputTokens: response?.toolNodeInputTokens
? response.toolNodeInputTokens + inputTokens
: inputTokens,
toolNodeOutputTokens: response?.toolNodeOutputTokens
? response.toolNodeOutputTokens + outputTokens
: outputTokens,
completeMessages,
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
runTimes: (response?.runTimes || 0) + 1
@@ -366,17 +373,9 @@ export const runToolWithPromptCall = async (
function_call: toolJson
};
/*
...
user
assistant: tool data
*/
const concatToolMessages = [
...requestMessages,
assistantToolMsgParams
] as ChatCompletionMessageParam[];
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
const tokens = await countGptMessagesTokens(concatToolMessages, undefined);
const inputTokens = await countGptMessagesTokens(requestMessages);
const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]);
/*
...
@@ -437,7 +436,12 @@ ANSWER: `;
}
const runTimes = (response?.runTimes || 0) + toolsRunResponse.toolResponse.runTimes;
const toolNodeTokens = response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens;
const toolNodeInputTokens = response?.toolNodeInputTokens
? response.toolNodeInputTokens + inputTokens
: inputTokens;
const toolNodeOutputTokens = response?.toolNodeOutputTokens
? response.toolNodeOutputTokens + outputTokens
: outputTokens;
// Check stop signal
const hasStopSignal = toolsRunResponse.toolResponse.flowResponses.some((item) => !!item.toolStop);
@@ -460,7 +464,8 @@ ANSWER: `;
return {
dispatchFlowResponse,
toolNodeTokens,
toolNodeInputTokens,
toolNodeOutputTokens,
completeMessages: filterMessages,
assistantResponses: toolNodeAssistants,
runTimes,
@@ -475,7 +480,8 @@ ANSWER: `;
},
{
dispatchFlowResponse,
toolNodeTokens,
toolNodeInputTokens,
toolNodeOutputTokens,
assistantResponses: toolNodeAssistants,
runTimes
}

View File

@@ -158,7 +158,8 @@ export const runToolWithToolChoice = async (
return {
dispatchFlowResponse: [toolRunResponse],
toolNodeTokens: 0,
toolNodeInputTokens: 0,
toolNodeOutputTokens: 0,
completeMessages: requestMessages,
assistantResponses: toolRunResponse.assistantResponses,
runTimes: toolRunResponse.runTimes,
@@ -176,7 +177,8 @@ export const runToolWithToolChoice = async (
},
{
dispatchFlowResponse: [toolRunResponse],
toolNodeTokens: 0,
toolNodeInputTokens: 0,
toolNodeOutputTokens: 0,
assistantResponses: toolRunResponse.assistantResponses,
runTimes: toolRunResponse.runTimes
}
@@ -428,7 +430,9 @@ export const runToolWithToolChoice = async (
] as ChatCompletionMessageParam[];
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
const tokens = await countGptMessagesTokens(concatToolMessages, tools);
const inputTokens = await countGptMessagesTokens(requestMessages, tools);
const outputTokens = await countGptMessagesTokens(assistantToolMsgParams);
/*
...
user
@@ -463,7 +467,10 @@ export const runToolWithToolChoice = async (
const runTimes =
(response?.runTimes || 0) +
flatToolsResponseData.reduce((sum, item) => sum + item.runTimes, 0);
const toolNodeTokens = response ? response.toolNodeTokens + tokens : tokens;
const toolNodeInputTokens = response ? response.toolNodeInputTokens + inputTokens : inputTokens;
const toolNodeOutputTokens = response
? response.toolNodeOutputTokens + outputTokens
: outputTokens;
// Check stop signal
const hasStopSignal = flatToolsResponseData.some(
@@ -496,7 +503,8 @@ export const runToolWithToolChoice = async (
return {
dispatchFlowResponse,
toolNodeTokens,
toolNodeInputTokens,
toolNodeOutputTokens,
completeMessages,
assistantResponses: toolNodeAssistants,
runTimes,
@@ -512,7 +520,8 @@ export const runToolWithToolChoice = async (
},
{
dispatchFlowResponse,
toolNodeTokens,
toolNodeInputTokens,
toolNodeOutputTokens,
assistantResponses: toolNodeAssistants,
runTimes
}
@@ -524,14 +533,17 @@ export const runToolWithToolChoice = async (
content: answer
};
const completeMessages = filterMessages.concat(gptAssistantResponse);
const tokens = await countGptMessagesTokens(completeMessages, tools);
const inputTokens = await countGptMessagesTokens(requestMessages, tools);
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
// concat tool assistant
const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType;
return {
dispatchFlowResponse: response?.dispatchFlowResponse || [],
toolNodeTokens: response ? response.toolNodeTokens + tokens : tokens,
toolNodeInputTokens: response ? response.toolNodeInputTokens + inputTokens : inputTokens,
toolNodeOutputTokens: response ? response.toolNodeOutputTokens + outputTokens : outputTokens,
completeMessages,
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
runTimes: (response?.runTimes || 0) + 1
@@ -578,7 +590,8 @@ async function streamResponse({
text: content
})
});
} else if (responseChoice?.tool_calls?.[0]) {
}
if (responseChoice?.tool_calls?.[0]) {
const toolCall: ChatCompletionMessageToolCall = responseChoice.tool_calls[0];
// In a stream response, only one tool is returned at a time. If have id, description is executing a tool
if (toolCall.id || callingTool) {

View File

@@ -31,7 +31,9 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
export type RunToolResponse = {
dispatchFlowResponse: DispatchFlowResponse[];
toolNodeTokens: number;
toolNodeTokens?: number; // deprecated
toolNodeInputTokens: number;
toolNodeOutputTokens: number;
completeMessages?: ChatCompletionMessageParam[];
assistantResponses?: AIChatItemValueItemType[];
toolWorkflowInteractiveResponse?: WorkflowInteractiveResponseType;

View File

@@ -5,13 +5,17 @@ import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
import { createChatCompletion } from '../../../ai/config';
import type { ChatCompletion, StreamChatType } from '@fastgpt/global/core/ai/type.d';
import type {
ChatCompletion,
ChatCompletionMessageParam,
StreamChatType
} from '@fastgpt/global/core/ai/type.d';
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { postTextCensor } from '../../../../common/api/requestPlusApi';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import type { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
import { countMessagesTokens } from '../../../../common/string/tiktoken/index';
import { countGptMessagesTokens } from '../../../../common/string/tiktoken/index';
import {
chats2GPTMessages,
chatValue2RuntimePrompt,
@@ -214,16 +218,23 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
return Promise.reject(getEmptyResponseTip());
}
const completeMessages = requestMessages.concat({
role: ChatCompletionRequestMessageRoleEnum.Assistant,
content: answerText
});
const AIMessages: ChatCompletionMessageParam[] = [
{
role: ChatCompletionRequestMessageRoleEnum.Assistant,
content: answerText
}
];
const completeMessages = [...requestMessages, ...AIMessages];
const chatCompleteMessages = GPTMessages2Chats(completeMessages);
const tokens = await countMessagesTokens(chatCompleteMessages);
const inputTokens = await countGptMessagesTokens(requestMessages);
const outputTokens = await countGptMessagesTokens(AIMessages);
const { totalPoints, modelName } = formatModelChars2Points({
model,
tokens,
inputTokens,
outputTokens,
modelType: ModelTypeEnum.llm
});
@@ -232,7 +243,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
[DispatchNodeResponseKeyEnum.nodeResponse]: {
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
model: modelName,
tokens,
tokens: inputTokens + outputTokens,
inputTokens: inputTokens,
outputTokens: outputTokens,
query: `${userChatInput}`,
maxToken: max_tokens,
historyPreview: getHistoryPreview(
@@ -247,7 +260,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
moduleName: name,
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
model: modelName,
tokens
inputTokens: inputTokens,
outputTokens: outputTokens
}
],
[DispatchNodeResponseKeyEnum.toolResponses]: answerText,

View File

@@ -120,14 +120,14 @@ export async function dispatchDatasetSearch(
// vector
const { totalPoints, modelName } = formatModelChars2Points({
model: vectorModel.model,
tokens,
inputTokens: tokens,
modelType: ModelTypeEnum.vector
});
const responseData: DispatchNodeResponseType & { totalPoints: number } = {
totalPoints,
query: concatQueries.join('\n'),
model: modelName,
tokens,
inputTokens: tokens,
similarity: usingSimilarityFilter ? similarity : undefined,
limit,
searchMode,
@@ -139,19 +139,21 @@ export async function dispatchDatasetSearch(
totalPoints,
moduleName: node.name,
model: modelName,
tokens
inputTokens: tokens
}
];
if (aiExtensionResult) {
const { totalPoints, modelName } = formatModelChars2Points({
model: aiExtensionResult.model,
tokens: aiExtensionResult.tokens,
inputTokens: aiExtensionResult.inputTokens,
outputTokens: aiExtensionResult.outputTokens,
modelType: ModelTypeEnum.llm
});
responseData.totalPoints += totalPoints;
responseData.tokens = aiExtensionResult.tokens;
responseData.inputTokens = aiExtensionResult.inputTokens;
responseData.outputTokens = aiExtensionResult.outputTokens;
responseData.extensionModel = modelName;
responseData.extensionResult =
aiExtensionResult.extensionQueries?.join('\n') ||
@@ -161,7 +163,8 @@ export async function dispatchDatasetSearch(
totalPoints,
moduleName: 'core.module.template.Query extension',
model: modelName,
tokens: aiExtensionResult.tokens
inputTokens: aiExtensionResult.inputTokens,
outputTokens: aiExtensionResult.outputTokens
});
}

View File

@@ -130,8 +130,7 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
{
moduleName: plugin.name,
totalPoints: usagePoints,
tokens: 0
totalPoints: usagePoints
}
],
[DispatchNodeResponseKeyEnum.toolResponses]: output?.pluginOutput

View File

@@ -153,8 +153,7 @@ export const dispatchRunAppNode = async (props: Props): Promise<Response> => {
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
{
moduleName: appData.name,
totalPoints: usagePoints,
tokens: 0
totalPoints: usagePoints
}
],
[DispatchNodeResponseKeyEnum.toolResponses]: text,

View File

@@ -31,7 +31,7 @@ export const dispatchQueryExtension = async ({
const queryExtensionModel = getLLMModel(model);
const chatHistories = getHistories(history, histories);
const { extensionQueries, tokens } = await queryExtension({
const { extensionQueries, inputTokens, outputTokens } = await queryExtension({
chatBg: systemPrompt,
query: userChatInput,
histories: chatHistories,
@@ -42,7 +42,8 @@ export const dispatchQueryExtension = async ({
const { totalPoints, modelName } = formatModelChars2Points({
model: queryExtensionModel.model,
tokens,
inputTokens,
outputTokens,
modelType: ModelTypeEnum.llm
});
@@ -59,7 +60,8 @@ export const dispatchQueryExtension = async ({
[DispatchNodeResponseKeyEnum.nodeResponse]: {
totalPoints,
model: modelName,
tokens,
inputTokens,
outputTokens,
query: userChatInput,
textOutput: JSON.stringify(filterSameQueries)
},
@@ -68,7 +70,8 @@ export const dispatchQueryExtension = async ({
moduleName: node.name,
totalPoints,
model: modelName,
tokens
inputTokens,
outputTokens
}
],
[NodeOutputKeyEnum.text]: JSON.stringify(filterSameQueries)