diff --git a/docSite/content/zh-cn/docs/development/upgrading/497.md b/docSite/content/zh-cn/docs/development/upgrading/497.md index 6b966525b..b0c22695c 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/497.md +++ b/docSite/content/zh-cn/docs/development/upgrading/497.md @@ -24,6 +24,7 @@ weight: 793 2. 调整 PG vector 查询语句,强制使用向量索引。 3. 对话时间统计,准确返回工作流整体运行时间。 4. 从 ai_proxy 获取音频解析时长。 +5. AI 模型 Token 值均优先采用 API usage,确保 tokens 值准确,若为空,则再采用 GPT3.5 的估算方式。 ## 🐛 修复 diff --git a/packages/global/core/ai/constants.ts b/packages/global/core/ai/constants.ts index 538fc1098..358a66118 100644 --- a/packages/global/core/ai/constants.ts +++ b/packages/global/core/ai/constants.ts @@ -1,4 +1,13 @@ import { i18nT } from '../../../web/i18n/utils'; +import type { CompletionUsage } from './type'; + +export const getLLMDefaultUsage = (): CompletionUsage => { + return { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0 + }; +}; export enum ChatCompletionRequestMessageRoleEnum { 'System' = 'system', diff --git a/packages/global/core/ai/type.d.ts b/packages/global/core/ai/type.d.ts index c7d1fa740..ce77cc207 100644 --- a/packages/global/core/ai/type.d.ts +++ b/packages/global/core/ai/type.d.ts @@ -10,6 +10,7 @@ import type { } from 'openai/resources'; import { ChatMessageTypeEnum } from './constants'; import { WorkflowInteractiveResponseType } from '../workflow/template/system/interactive/type'; +import { Stream } from 'openai/streaming'; export * from 'openai/resources'; // Extension of ChatCompletionMessageParam, Add file url type @@ -84,6 +85,7 @@ export type CompletionFinishReason = export default openai; export * from 'openai'; +export type { Stream }; // Other export type PromptTemplateItem = { diff --git a/packages/global/core/chat/utils.ts b/packages/global/core/chat/utils.ts index 5423d39ff..9333ec278 100644 --- a/packages/global/core/chat/utils.ts +++ b/packages/global/core/chat/utils.ts @@ -185,7 +185,6 @@ export const mergeChatResponseData = ( runningTime: +((lastResponse.runningTime || 0) + (curr.runningTime || 0)).toFixed(2), totalPoints: (lastResponse.totalPoints || 0) + (curr.totalPoints || 0), childTotalPoints: (lastResponse.childTotalPoints || 0) + (curr.childTotalPoints || 0), - toolCallTokens: (lastResponse.toolCallTokens || 0) + (curr.toolCallTokens || 0), toolDetail: [...(lastResponse.toolDetail || []), ...(curr.toolDetail || [])], loopDetail: [...(lastResponse.loopDetail || []), ...(curr.loopDetail || [])], pluginDetail: [...(lastResponse.pluginDetail || []), ...(curr.pluginDetail || [])] diff --git a/packages/global/core/workflow/runtime/type.d.ts b/packages/global/core/workflow/runtime/type.d.ts index 6631cd297..343bb4075 100644 --- a/packages/global/core/workflow/runtime/type.d.ts +++ b/packages/global/core/workflow/runtime/type.d.ts @@ -186,7 +186,6 @@ export type DispatchNodeResponseType = { ifElseResult?: string; // tool - toolCallTokens?: number; toolCallInputTokens?: number; toolCallOutputTokens?: number; toolDetail?: ChatHistoryItemResType[]; diff --git a/packages/service/core/ai/config.ts b/packages/service/core/ai/config.ts index e1af8b173..f4f93ea98 100644 --- a/packages/service/core/ai/config.ts +++ b/packages/service/core/ai/config.ts @@ -1,5 +1,5 @@ import OpenAI from '@fastgpt/global/core/ai'; -import { +import type { ChatCompletionCreateParamsNonStreaming, ChatCompletionCreateParamsStreaming, StreamChatType, diff --git a/packages/service/core/ai/functions/createQuestionGuide.ts b/packages/service/core/ai/functions/createQuestionGuide.ts index b096b6f32..d8eca0def 100644 --- a/packages/service/core/ai/functions/createQuestionGuide.ts +++ b/packages/service/core/ai/functions/createQuestionGuide.ts @@ -2,7 +2,7 @@ import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d' import { createChatCompletion } from '../config'; import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index'; import { loadRequestMessages } from '../../chat/utils'; -import { llmCompletionsBodyFormat } from '../utils'; +import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils'; import { QuestionGuidePrompt, QuestionGuideFooterPrompt @@ -35,7 +35,7 @@ export async function createQuestionGuide({ useVision: false }); - const { response: data } = await createChatCompletion({ + const { response } = await createChatCompletion({ body: llmCompletionsBodyFormat( { model, @@ -47,21 +47,20 @@ export async function createQuestionGuide({ model ) }); - - const answer = data.choices?.[0]?.message?.content || ''; + const { text: answer, usage } = await llmResponseToAnswerText(response); const start = answer.indexOf('['); const end = answer.lastIndexOf(']'); - const inputTokens = await countGptMessagesTokens(requestMessages); - const outputTokens = await countPromptTokens(answer); + const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages)); + const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer)); if (start === -1 || end === -1) { addLog.warn('Create question guide error', { answer }); return { result: [], - inputTokens: 0, - outputTokens: 0 + inputTokens, + outputTokens }; } @@ -81,8 +80,8 @@ export async function createQuestionGuide({ return { result: [], - inputTokens: 0, - outputTokens: 0 + inputTokens, + outputTokens }; } } diff --git a/packages/service/core/ai/functions/queryExtension.ts b/packages/service/core/ai/functions/queryExtension.ts index c4b85ffcd..c94a8acb4 100644 --- a/packages/service/core/ai/functions/queryExtension.ts +++ b/packages/service/core/ai/functions/queryExtension.ts @@ -4,7 +4,7 @@ import { ChatItemType } from '@fastgpt/global/core/chat/type'; import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index'; import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt'; import { getLLMModel } from '../model'; -import { llmCompletionsBodyFormat } from '../utils'; +import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils'; import { addLog } from '../../../common/system/log'; import { filterGPTMessageByMaxContext } from '../../chat/utils'; import json5 from 'json5'; @@ -167,7 +167,7 @@ assistant: ${chatBg} } ] as any; - const { response: result } = await createChatCompletion({ + const { response } = await createChatCompletion({ body: llmCompletionsBodyFormat( { stream: false, @@ -178,15 +178,17 @@ assistant: ${chatBg} modelData ) }); + const { text: answer, usage } = await llmResponseToAnswerText(response); + const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages)); + const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer)); - let answer = result.choices?.[0]?.message?.content || ''; if (!answer) { return { rawQuery: query, extensionQueries: [], model, - inputTokens: 0, - outputTokens: 0 + inputTokens: inputTokens, + outputTokens: outputTokens }; } @@ -200,8 +202,8 @@ assistant: ${chatBg} rawQuery: query, extensionQueries: [], model, - inputTokens: 0, - outputTokens: 0 + inputTokens: inputTokens, + outputTokens: outputTokens }; } @@ -218,8 +220,8 @@ assistant: ${chatBg} rawQuery: query, extensionQueries: (Array.isArray(queries) ? queries : []).slice(0, 5), model, - inputTokens: await countGptMessagesTokens(messages), - outputTokens: await countPromptTokens(answer) + inputTokens, + outputTokens }; } catch (error) { addLog.warn('Query extension failed, not a valid JSON', { @@ -229,8 +231,8 @@ assistant: ${chatBg} rawQuery: query, extensionQueries: [], model, - inputTokens: 0, - outputTokens: 0 + inputTokens, + outputTokens }; } }; diff --git a/packages/service/core/ai/utils.ts b/packages/service/core/ai/utils.ts index 3cae8b086..161b8e21c 100644 --- a/packages/service/core/ai/utils.ts +++ b/packages/service/core/ai/utils.ts @@ -3,9 +3,12 @@ import { ChatCompletionCreateParamsNonStreaming, ChatCompletionCreateParamsStreaming, CompletionFinishReason, - StreamChatType + StreamChatType, + UnStreamChatType, + CompletionUsage } from '@fastgpt/global/core/ai/type'; import { getLLMModel } from './model'; +import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants'; /* Count response max token @@ -97,13 +100,42 @@ export const llmCompletionsBodyFormat = ( return requestBody as unknown as InferCompletionsBody; }; -export const llmStreamResponseToAnswerText = async (response: StreamChatType) => { +export const llmStreamResponseToAnswerText = async ( + response: StreamChatType +): Promise<{ + text: string; + usage?: CompletionUsage; +}> => { let answer = ''; + let usage = getLLMDefaultUsage(); for await (const part of response) { + usage = part.usage || usage; + const content = part.choices?.[0]?.delta?.content || ''; answer += content; } - return parseReasoningContent(answer)[1]; + return { + text: parseReasoningContent(answer)[1], + usage + }; +}; +export const llmUnStreamResponseToAnswerText = async ( + response: UnStreamChatType +): Promise<{ + text: string; + usage?: CompletionUsage; +}> => { + const answer = response.choices?.[0]?.message?.content || ''; + return { + text: answer, + usage: response.usage + }; +}; +export const llmResponseToAnswerText = async (response: StreamChatType | UnStreamChatType) => { + if ('iterator' in response) { + return llmStreamResponseToAnswerText(response); + } + return llmUnStreamResponseToAnswerText(response); }; // Parse tags to think and answer - unstream response @@ -140,7 +172,7 @@ export const parseReasoningStreamContent = () => { part: { choices: { delta: { - content?: string; + content?: string | null; reasoning_content?: string; }; finish_reason?: CompletionFinishReason; diff --git a/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts b/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts index e0b608e83..ec54bf5e9 100644 --- a/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts +++ b/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts @@ -19,7 +19,7 @@ import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/ty import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt'; import { getHandleId } from '@fastgpt/global/core/workflow/utils'; import { loadRequestMessages } from '../../../chat/utils'; -import { llmCompletionsBodyFormat } from '../../../ai/utils'; +import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../../../ai/utils'; import { addLog } from '../../../../common/system/log'; import { ModelTypeEnum } from '../../../../../global/core/ai/model'; import { replaceVariable } from '@fastgpt/global/common/string/tools'; @@ -129,7 +129,7 @@ const completions = async ({ useVision: false }); - const { response: data } = await createChatCompletion({ + const { response } = await createChatCompletion({ body: llmCompletionsBodyFormat( { model: cqModel.model, @@ -141,7 +141,7 @@ const completions = async ({ ), userKey: externalProvider.openaiAccount }); - const answer = data.choices?.[0].message?.content || ''; + const { text: answer, usage } = await llmResponseToAnswerText(response); // console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2)); // console.log(answer, '----'); @@ -156,8 +156,8 @@ const completions = async ({ } return { - inputTokens: await countGptMessagesTokens(requestMessages), - outputTokens: await countPromptTokens(answer), + inputTokens: usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages)), + outputTokens: usage?.completion_tokens || (await countPromptTokens(answer)), arg: { type: id } }; }; diff --git a/packages/service/core/workflow/dispatch/agent/extract.ts b/packages/service/core/workflow/dispatch/agent/extract.ts index e3886f7e2..2d4b682a4 100644 --- a/packages/service/core/workflow/dispatch/agent/extract.ts +++ b/packages/service/core/workflow/dispatch/agent/extract.ts @@ -23,14 +23,14 @@ import { getLLMModel } from '../../../ai/model'; import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'; import json5 from 'json5'; import { - ChatCompletionCreateParams, ChatCompletionMessageParam, - ChatCompletionTool + ChatCompletionTool, + UnStreamChatType } from '@fastgpt/global/core/ai/type'; import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type'; import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt'; -import { llmCompletionsBodyFormat } from '../../../ai/utils'; +import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../../../ai/utils'; import { ModelTypeEnum } from '../../../../../global/core/ai/model'; import { getExtractJsonPrompt, @@ -76,13 +76,6 @@ export async function dispatchContentExtract(props: Props): Promise { extractModel }); } - if (extractModel.functionCall) { - return functionCall({ - ...props, - histories: chatHistories, - extractModel - }); - } return completions({ ...props, histories: chatHistories, @@ -233,9 +226,10 @@ const toolChoice = async (props: ActionProps) => { } ]; - const { response } = await createChatCompletion({ + const { response } = (await createChatCompletion({ body: llmCompletionsBodyFormat( { + stream: false, model: extractModel.model, temperature: 0.01, messages: filterMessages, @@ -245,7 +239,7 @@ const toolChoice = async (props: ActionProps) => { extractModel ), userKey: externalProvider.openaiAccount - }); + })) as { response: UnStreamChatType }; const arg: Record = (() => { try { @@ -267,8 +261,9 @@ const toolChoice = async (props: ActionProps) => { } ]; - const inputTokens = await countGptMessagesTokens(filterMessages, tools); - const outputTokens = await countGptMessagesTokens(AIMessages); + const usage = response.usage; + const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(filterMessages, tools)); + const outputTokens = usage?.completion_tokens || (await countGptMessagesTokens(AIMessages)); return { inputTokens, outputTokens, @@ -276,59 +271,6 @@ const toolChoice = async (props: ActionProps) => { }; }; -const functionCall = async (props: ActionProps) => { - const { externalProvider, extractModel } = props; - - const { agentFunction, filterMessages } = await getFunctionCallSchema(props); - const functions: ChatCompletionCreateParams.Function[] = [agentFunction]; - - const { response } = await createChatCompletion({ - body: llmCompletionsBodyFormat( - { - model: extractModel.model, - temperature: 0.01, - messages: filterMessages, - function_call: { - name: agentFunName - }, - functions - }, - extractModel - ), - userKey: externalProvider.openaiAccount - }); - - try { - const arg = JSON.parse(response?.choices?.[0]?.message?.function_call?.arguments || ''); - - const AIMessages: ChatCompletionMessageParam[] = [ - { - role: ChatCompletionRequestMessageRoleEnum.Assistant, - function_call: response.choices?.[0]?.message?.function_call - } - ]; - - const inputTokens = await countGptMessagesTokens(filterMessages, undefined, functions); - const outputTokens = await countGptMessagesTokens(AIMessages); - - return { - arg, - inputTokens, - outputTokens - }; - } catch (error) { - console.log(response.choices?.[0]?.message); - - console.log('Your model may not support toll_call', error); - - return { - arg: {}, - inputTokens: 0, - outputTokens: 0 - }; - } -}; - const completions = async ({ extractModel, externalProvider, @@ -373,7 +315,7 @@ Human: ${content}` useVision: false }); - const { response: data } = await createChatCompletion({ + const { response } = await createChatCompletion({ body: llmCompletionsBodyFormat( { model: extractModel.model, @@ -385,7 +327,9 @@ Human: ${content}` ), userKey: externalProvider.openaiAccount }); - const answer = data.choices?.[0].message?.content || ''; + const { text: answer, usage } = await llmResponseToAnswerText(response); + const inputTokens = usage?.prompt_tokens || (await countMessagesTokens(messages)); + const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer)); // parse response const jsonStr = sliceJsonStr(answer); @@ -393,8 +337,8 @@ Human: ${content}` if (!jsonStr) { return { rawResponse: answer, - inputTokens: await countMessagesTokens(messages), - outputTokens: await countPromptTokens(answer), + inputTokens, + outputTokens, arg: {} }; } @@ -402,8 +346,8 @@ Human: ${content}` try { return { rawResponse: answer, - inputTokens: await countMessagesTokens(messages), - outputTokens: await countPromptTokens(answer), + inputTokens, + outputTokens, arg: json5.parse(jsonStr) as Record }; } catch (error) { @@ -411,8 +355,8 @@ Human: ${content}` console.log(error); return { rawResponse: answer, - inputTokens: await countMessagesTokens(messages), - outputTokens: await countPromptTokens(answer), + inputTokens, + outputTokens, arg: {} }; } diff --git a/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts b/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts index 1ae995a13..daf074efb 100644 --- a/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts +++ b/packages/service/core/workflow/dispatch/agent/runTool/functionCall.ts @@ -13,7 +13,10 @@ import { NextApiResponse } from 'next'; import { responseWriteController } from '../../../../../common/response'; import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants'; import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils'; -import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; +import { + ChatCompletionRequestMessageRoleEnum, + getLLMDefaultUsage +} from '@fastgpt/global/core/ai/constants'; import { dispatchWorkFlow } from '../../index'; import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d'; import json5 from 'json5'; @@ -244,17 +247,34 @@ export const runToolWithFunctionCall = async ( } }); - const { answer, functionCalls } = await (async () => { - if (res && isStreamResponse) { - return streamResponse({ + let { answer, functionCalls, inputTokens, outputTokens } = await (async () => { + if (isStreamResponse) { + if (!res || res.closed) { + return { + answer: '', + functionCalls: [], + inputTokens: 0, + outputTokens: 0 + }; + } + const result = await streamResponse({ res, toolNodes, stream: aiResponse, workflowStreamResponse }); + + return { + answer: result.answer, + functionCalls: result.functionCalls, + inputTokens: result.usage.prompt_tokens, + outputTokens: result.usage.completion_tokens + }; } else { const result = aiResponse as ChatCompletion; const function_call = result.choices?.[0]?.message?.function_call; + const usage = result.usage; + const toolNode = toolNodes.find((node) => node.nodeId === function_call?.name); const toolCalls = function_call @@ -270,7 +290,9 @@ export const runToolWithFunctionCall = async ( return { answer: result.choices?.[0]?.message?.content || '', - functionCalls: toolCalls + functionCalls: toolCalls, + inputTokens: usage?.prompt_tokens, + outputTokens: usage?.completion_tokens }; } })(); @@ -338,7 +360,7 @@ export const runToolWithFunctionCall = async ( : flatToolsResponseData; const functionCall = functionCalls[0]; - if (functionCall && !res?.closed) { + if (functionCall) { // Run the tool, combine its results, and perform another round of AI calls const assistantToolMsgParams: ChatCompletionAssistantMessageParam = { role: ChatCompletionRequestMessageRoleEnum.Assistant, @@ -356,8 +378,9 @@ export const runToolWithFunctionCall = async ( ] as ChatCompletionMessageParam[]; // Only toolCall tokens are counted here, Tool response tokens count towards the next reply // const tokens = await countGptMessagesTokens(concatToolMessages, undefined, functions); - const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions); - const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]); + inputTokens = + inputTokens || (await countGptMessagesTokens(requestMessages, undefined, functions)); + outputTokens = outputTokens || (await countGptMessagesTokens([assistantToolMsgParams])); /* ... user @@ -459,8 +482,9 @@ export const runToolWithFunctionCall = async ( content: answer }; const completeMessages = filterMessages.concat(gptAssistantResponse); - const inputTokens = await countGptMessagesTokens(requestMessages, undefined, functions); - const outputTokens = await countGptMessagesTokens([gptAssistantResponse]); + inputTokens = + inputTokens || (await countGptMessagesTokens(requestMessages, undefined, functions)); + outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse])); // console.log(tokens, 'response token'); // concat tool assistant @@ -500,8 +524,10 @@ async function streamResponse({ let textAnswer = ''; let functionCalls: ChatCompletionMessageFunctionCall[] = []; let functionId = getNanoid(); + let usage = getLLMDefaultUsage(); for await (const part of stream) { + usage = part.usage || usage; if (res.closed) { stream.controller?.abort(); break; @@ -522,7 +548,7 @@ async function streamResponse({ }); } else if (responseChoice.function_call) { const functionCall: { - arguments: string; + arguments?: string; name?: string; } = responseChoice.function_call; @@ -532,11 +558,9 @@ async function streamResponse({ const toolNode = toolNodes.find((item) => item.nodeId === functionCall?.name); if (toolNode) { - if (functionCall?.arguments === undefined) { - functionCall.arguments = ''; - } functionCalls.push({ ...functionCall, + arguments: functionCall.arguments || '', id: functionId, name: functionCall.name, toolName: toolNode.name, @@ -552,7 +576,7 @@ async function streamResponse({ toolName: toolNode.name, toolAvatar: toolNode.avatar, functionName: functionCall.name, - params: functionCall.arguments, + params: functionCall.arguments || '', response: '' } } @@ -585,5 +609,5 @@ async function streamResponse({ } } - return { answer: textAnswer, functionCalls }; + return { answer: textAnswer, functionCalls, usage }; } diff --git a/packages/service/core/workflow/dispatch/agent/runTool/index.ts b/packages/service/core/workflow/dispatch/agent/runTool/index.ts index 7850e2eb3..6e7d09f07 100644 --- a/packages/service/core/workflow/dispatch/agent/runTool/index.ts +++ b/packages/service/core/workflow/dispatch/agent/runTool/index.ts @@ -171,7 +171,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise< const { toolWorkflowInteractiveResponse, dispatchFlowResponse, // tool flow response - toolNodeTokens, toolNodeInputTokens, toolNodeOutputTokens, completeMessages = [], // The actual message sent to AI(just save text) @@ -271,7 +270,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise< [DispatchNodeResponseKeyEnum.nodeResponse]: { // 展示的积分消耗 totalPoints: totalPointsUsage, - toolCallTokens: toolNodeTokens, toolCallInputTokens: toolNodeInputTokens, toolCallOutputTokens: toolNodeOutputTokens, childTotalPoints: flatUsages.reduce((sum, item) => sum + item.totalPoints, 0), diff --git a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts index 808006a7f..215f79ed2 100644 --- a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts +++ b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts @@ -9,7 +9,10 @@ import { NextApiResponse } from 'next'; import { responseWriteController } from '../../../../../common/response'; import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants'; import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils'; -import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; +import { + ChatCompletionRequestMessageRoleEnum, + getLLMDefaultUsage +} from '@fastgpt/global/core/ai/constants'; import { dispatchWorkFlow } from '../../index'; import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d'; import json5 from 'json5'; @@ -256,9 +259,18 @@ export const runToolWithPromptCall = async ( } }); - const { answer, reasoning, finish_reason } = await (async () => { - if (res && isStreamResponse) { - const { answer, reasoning, finish_reason } = await streamResponse({ + let { answer, reasoning, finish_reason, inputTokens, outputTokens } = await (async () => { + if (isStreamResponse) { + if (!res || res.closed) { + return { + answer: '', + reasoning: '', + finish_reason: 'close' as const, + inputTokens: 0, + outputTokens: 0 + }; + } + const { answer, reasoning, finish_reason, usage } = await streamResponse({ res, toolNodes, stream: aiResponse, @@ -266,18 +278,28 @@ export const runToolWithPromptCall = async ( aiChatReasoning }); - return { answer, reasoning, finish_reason }; + return { + answer, + reasoning, + finish_reason, + inputTokens: usage.prompt_tokens, + outputTokens: usage.completion_tokens + }; } else { const finish_reason = aiResponse.choices?.[0]?.finish_reason as CompletionFinishReason; const content = aiResponse.choices?.[0]?.message?.content || ''; + // @ts-ignore const reasoningContent: string = aiResponse.choices?.[0]?.message?.reasoning_content || ''; + const usage = aiResponse.usage; // API already parse reasoning content if (reasoningContent || !aiChatReasoning) { return { answer: content, reasoning: reasoningContent, - finish_reason + finish_reason, + inputTokens: usage?.prompt_tokens, + outputTokens: usage?.completion_tokens }; } @@ -285,7 +307,9 @@ export const runToolWithPromptCall = async ( return { answer, reasoning: think, - finish_reason + finish_reason, + inputTokens: usage?.prompt_tokens, + outputTokens: usage?.completion_tokens }; } })(); @@ -336,8 +360,8 @@ export const runToolWithPromptCall = async ( reasoning_text: undefined }); - const inputTokens = await countGptMessagesTokens(requestMessages); - const outputTokens = await countGptMessagesTokens([gptAssistantResponse]); + inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages)); + outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse])); // concat tool assistant const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType; @@ -423,8 +447,8 @@ export const runToolWithPromptCall = async ( }; // Only toolCall tokens are counted here, Tool response tokens count towards the next reply - const inputTokens = await countGptMessagesTokens(requestMessages); - const outputTokens = await countGptMessagesTokens([assistantToolMsgParams]); + inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages)); + outputTokens = outputTokens || (await countGptMessagesTokens([assistantToolMsgParams])); /* ... @@ -559,9 +583,12 @@ async function streamResponse({ let answer = ''; let reasoning = ''; let finish_reason: CompletionFinishReason = null; + let usage = getLLMDefaultUsage(); + const { parsePart, getStartTagBuffer } = parseReasoningStreamContent(); for await (const part of stream) { + usage = part.usage || usage; if (res.closed) { stream.controller?.abort(); finish_reason = 'close'; @@ -629,7 +656,7 @@ async function streamResponse({ } } - return { answer, reasoning, finish_reason }; + return { answer, reasoning, finish_reason, usage }; } const parseAnswer = ( diff --git a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts index 1b7bfe5b5..2b61bd371 100644 --- a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts +++ b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts @@ -14,7 +14,10 @@ import { NextApiResponse } from 'next'; import { responseWriteController } from '../../../../../common/response'; import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants'; import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils'; -import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; +import { + ChatCompletionRequestMessageRoleEnum, + getLLMDefaultUsage +} from '@fastgpt/global/core/ai/constants'; import { dispatchWorkFlow } from '../../index'; import { DispatchToolModuleProps, RunToolResponse, ToolNodeItemType } from './type.d'; import json5 from 'json5'; @@ -301,19 +304,38 @@ export const runToolWithToolChoice = async ( } }); - const { answer, toolCalls, finish_reason } = await (async () => { - if (res && isStreamResponse) { - return streamResponse({ + let { answer, toolCalls, finish_reason, inputTokens, outputTokens } = await (async () => { + if (isStreamResponse) { + if (!res || res.closed) { + return { + answer: '', + toolCalls: [], + finish_reason: 'close' as const, + inputTokens: 0, + outputTokens: 0 + }; + } + + const result = await streamResponse({ res, workflowStreamResponse, toolNodes, stream: aiResponse }); + + return { + answer: result.answer, + toolCalls: result.toolCalls, + finish_reason: result.finish_reason, + inputTokens: result.usage.prompt_tokens, + outputTokens: result.usage.completion_tokens + }; } else { const result = aiResponse as ChatCompletion; const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason; const calls = result.choices?.[0]?.message?.tool_calls || []; const answer = result.choices?.[0]?.message?.content || ''; + const usage = result.usage; // 加上name和avatar const toolCalls = calls.map((tool) => { @@ -353,7 +375,9 @@ export const runToolWithToolChoice = async ( return { answer, toolCalls: toolCalls, - finish_reason + finish_reason, + inputTokens: usage?.prompt_tokens, + outputTokens: usage?.completion_tokens }; } })(); @@ -447,7 +471,7 @@ export const runToolWithToolChoice = async ( ? response.dispatchFlowResponse.concat(flatToolsResponseData) : flatToolsResponseData; - if (toolCalls.length > 0 && !res?.closed) { + if (toolCalls.length > 0) { // Run the tool, combine its results, and perform another round of AI calls const assistantToolMsgParams: ChatCompletionAssistantMessageParam[] = [ ...(answer @@ -475,8 +499,8 @@ export const runToolWithToolChoice = async ( ] as ChatCompletionMessageParam[]; // Only toolCall tokens are counted here, Tool response tokens count towards the next reply - const inputTokens = await countGptMessagesTokens(requestMessages, tools); - const outputTokens = await countGptMessagesTokens(assistantToolMsgParams); + inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools)); + outputTokens = outputTokens || (await countGptMessagesTokens(assistantToolMsgParams)); /* ... @@ -580,8 +604,8 @@ export const runToolWithToolChoice = async ( content: answer }; const completeMessages = filterMessages.concat(gptAssistantResponse); - const inputTokens = await countGptMessagesTokens(requestMessages, tools); - const outputTokens = await countGptMessagesTokens([gptAssistantResponse]); + inputTokens = inputTokens || (await countGptMessagesTokens(requestMessages, tools)); + outputTokens = outputTokens || (await countGptMessagesTokens([gptAssistantResponse])); // concat tool assistant const toolNodeAssistant = GPTMessages2Chats([gptAssistantResponse])[0] as AIChatItemType; @@ -619,8 +643,10 @@ async function streamResponse({ let callingTool: { name: string; arguments: string } | null = null; let toolCalls: ChatCompletionMessageToolCall[] = []; let finishReason: CompletionFinishReason = null; + let usage = getLLMDefaultUsage(); for await (const part of stream) { + usage = part.usage || usage; if (res.closed) { stream.controller?.abort(); finishReason = 'close'; @@ -644,6 +670,7 @@ async function streamResponse({ }); } if (responseChoice?.tool_calls?.[0]) { + // @ts-ignore const toolCall: ChatCompletionMessageToolCall = responseChoice.tool_calls[0]; // In a stream response, only one tool is returned at a time. If have id, description is executing a tool if (toolCall.id || callingTool) { @@ -715,5 +742,5 @@ async function streamResponse({ } } - return { answer: textAnswer, toolCalls, finish_reason: finishReason }; + return { answer: textAnswer, toolCalls, finish_reason: finishReason, usage }; } diff --git a/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts b/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts index 52ec7c4bc..d55a0c98d 100644 --- a/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts +++ b/packages/service/core/workflow/dispatch/agent/runTool/type.d.ts @@ -36,7 +36,6 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{ export type RunToolResponse = { dispatchFlowResponse: DispatchFlowResponse[]; - toolNodeTokens?: number; // deprecated toolNodeInputTokens: number; toolNodeOutputTokens: number; completeMessages?: ChatCompletionMessageParam[]; diff --git a/packages/service/core/workflow/dispatch/chat/oneapi.ts b/packages/service/core/workflow/dispatch/chat/oneapi.ts index a96356ced..d8d9561db 100644 --- a/packages/service/core/workflow/dispatch/chat/oneapi.ts +++ b/packages/service/core/workflow/dispatch/chat/oneapi.ts @@ -9,11 +9,15 @@ import { createChatCompletion } from '../../../ai/config'; import type { ChatCompletionMessageParam, CompletionFinishReason, + CompletionUsage, StreamChatType } from '@fastgpt/global/core/ai/type.d'; import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'; import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d'; -import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; +import { + ChatCompletionRequestMessageRoleEnum, + getLLMDefaultUsage +} from '@fastgpt/global/core/ai/constants'; import type { ChatDispatchProps, DispatchNodeResultType @@ -199,17 +203,19 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise { + let { answerText, reasoningText, finish_reason, inputTokens, outputTokens } = await (async () => { if (isStreamResponse) { - if (!res) { + if (!res || res.closed) { return { answerText: '', reasoningText: '', - finish_reason: 'close' as const + finish_reason: 'close' as const, + inputTokens: 0, + outputTokens: 0 }; } // sse response - const { answer, reasoning, finish_reason } = await streamResponse({ + const { answer, reasoning, finish_reason, usage } = await streamResponse({ res, stream: response, aiChatReasoning, @@ -221,10 +227,13 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise { const content = response.choices?.[0]?.message?.content || ''; @@ -269,7 +278,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise - - - - - - + {activeModule?.tokens && ( + + )} + {(!!activeModule?.inputTokens || !!activeModule?.outputTokens) && ( + + )} + {(!!activeModule?.toolCallInputTokens || !!activeModule?.toolCallOutputTokens) && ( + + )}