import type { ChatCompletion, ChatCompletionCreateParamsNonStreaming, ChatCompletionCreateParamsStreaming, ChatCompletionMessageParam, ChatCompletionMessageToolCall, CompletionFinishReason, CompletionUsage, OpenAI, StreamChatType, UnStreamChatType } from '@fastgpt/global/core/ai/type'; import { computedMaxToken, computedTemperature, parseLLMStreamResponse, parseReasoningContent } from '../utils'; import { getLLMSupportParams, removeDatasetCiteText } from '@fastgpt/global/core/ai/llm/utils'; import { getAIApi } from '../config'; import type { OpenaiAccountType } from '@fastgpt/global/support/user/team/type'; import { customNanoid, getNanoid } from '@fastgpt/global/common/string/tools'; import { parsePromptToolCall, promptToolCallMessageRewrite } from './promptCall'; import { getLLMModel } from '../model'; import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; import { countGptMessagesTokens } from '../../../common/string/tiktoken/index'; import { loadRequestMessages } from './utils'; import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.schema'; import { i18nT } from '../../../../web/i18n/utils'; import { getErrText } from '@fastgpt/global/common/error/utils'; import json5 from 'json5'; import { getLogger, LogCategories } from '../../../common/logger'; import { saveLLMRequestRecord } from '../record/controller'; const getRequestId = () => { return customNanoid('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_-', 16); }; const logger = getLogger(LogCategories.MODULE.AI.LLM); export type ResponseEvents = { onStreaming?: (e: { text: string }) => void; onReasoning?: (e: { text: string }) => void; onToolCall?: (e: { call: ChatCompletionMessageToolCall }) => void; onToolParam?: (e: { tool: ChatCompletionMessageToolCall; params: string }) => void; }; export type CreateLLMResponseProps = { throwError?: boolean; userKey?: OpenaiAccountType; body: LLMRequestBodyType; isAborted?: () => boolean | undefined | null; custonHeaders?: Record; maxContinuations?: number; } & ResponseEvents; type LLMResponse = { requestId: string; // LLM 请求追踪 ID error?: any; isStreamResponse: boolean; answerText: string; reasoningText: string; toolCalls?: ChatCompletionMessageToolCall[]; finish_reason: CompletionFinishReason; responseEmptyTip?: string; usage: { inputTokens: number; outputTokens: number; }; requestMessages: ChatCompletionMessageParam[]; assistantMessage?: ChatCompletionMessageParam; completeMessages: ChatCompletionMessageParam[]; }; /* 底层封装 LLM 调用 帮助上层屏蔽 stream 和非 stream,以及 toolChoice 和 promptTool 模式。 工具调用无论哪种模式,都存 toolChoice 的格式,promptTool 通过修改 toolChoice 的结构,形成特定的 messages 进行调用。 */ export const createLLMResponse = async ( args: CreateLLMResponseProps ): Promise => { // 生成唯一的请求追踪 ID const requestId = getRequestId(); const { throwError = true, body, custonHeaders, userKey, maxContinuations = 1 } = args; const { messages, useVision, requestOrigin, tools, toolCallMode } = body; // Messages process const requestMessages = await loadRequestMessages({ messages, useVision, origin: requestOrigin }); // Message process const rewriteMessages = (() => { if (tools?.length && toolCallMode === 'prompt') { return promptToolCallMessageRewrite(requestMessages, tools); } return requestMessages; })(); const { requestBody, modelData } = await llmCompletionsBodyFormat({ ...body, messages: rewriteMessages }); // Initial request and accumulate results if finish_reason is 'length' let accumulatedAnswerText = ''; let accumulatedReasoningText = ''; let accumulatedToolCalls: ChatCompletionMessageToolCall[] | undefined; let currentFinishReason: CompletionFinishReason = 'stop'; let accumulatedUsage = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }; let currentError: any = undefined; let currentMessages = [...requestBody.messages]; let continuationCount = 0; let isStreamResponse = false; try { while (continuationCount < maxContinuations) { // console.debug( // 'LLM Request Body:', // JSON.stringify( // { // ...requestBody, // messages: currentMessages // }, // null, // 2 // ) // ); const { response, isStreamResponse: currentIsStreamResponse } = await createChatCompletion({ body: { ...requestBody, messages: currentMessages }, modelData, userKey, options: { headers: { Accept: 'application/json, text/plain, */*', ...custonHeaders } } }); // Save isStreamResponse from first request if (continuationCount === 0) { isStreamResponse = currentIsStreamResponse; } let { answerText, reasoningText, toolCalls, finish_reason, usage, error } = await (async () => { if (currentIsStreamResponse) { return createStreamResponse({ response, body, isAborted: args.isAborted, onStreaming: args.onStreaming, onReasoning: args.onReasoning, onToolCall: args.onToolCall, onToolParam: args.onToolParam }); } else { return createCompleteResponse({ response, body, onStreaming: args.onStreaming, onReasoning: args.onReasoning, onToolCall: args.onToolCall }); } })(); // Format toolCalls // 1. Auto complete arguments, avoid model not support "" arguments toolCalls = toolCalls?.map((tool) => ({ ...tool, function: { ...tool.function, arguments: tool.function.arguments || '{}' } })); // Accumulate results accumulatedAnswerText += answerText; accumulatedReasoningText += reasoningText; if (toolCalls?.length) { accumulatedToolCalls = [...(accumulatedToolCalls || []), ...toolCalls]; } currentFinishReason = finish_reason; currentError = error; // Accumulate usage if (usage) { accumulatedUsage.prompt_tokens += usage.prompt_tokens || 0; accumulatedUsage.completion_tokens += usage.completion_tokens || 0; accumulatedUsage.total_tokens += usage.total_tokens || 0; } // Check if we need to continue // TODO: 输出超出模型输出上限 if (finish_reason === 'length' && !error) { // Append assistant message and user continuation message currentMessages = currentMessages.slice(0, requestBody.messages.length); currentMessages = [ ...currentMessages, ...(accumulatedToolCalls ? [ { role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant', tool_calls: accumulatedToolCalls } ] : []), { role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant', ...(accumulatedAnswerText && { content: accumulatedAnswerText }), ...(accumulatedReasoningText && { reasoning_content: accumulatedReasoningText }) }, { role: ChatCompletionRequestMessageRoleEnum.User as 'user', content: '[继续输出]' } ]; logger.debug(`Continue LLM response due to length limit`, { continuationCount, completionTokens: usage?.completion_tokens }); continuationCount++; } else { // Stop condition reached break; } } // Use accumulated results let { answerText, reasoningText, toolCalls, finish_reason, usage, error } = { answerText: accumulatedAnswerText, reasoningText: accumulatedReasoningText, toolCalls: accumulatedToolCalls, finish_reason: currentFinishReason, usage: accumulatedUsage, error: currentError }; const assistantMessage: ChatCompletionMessageParam = { role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant', ...(answerText && { content: answerText }), ...(reasoningText && { reasoning_content: reasoningText }), ...(toolCalls?.length && { tool_calls: toolCalls }) }; // Usage count const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(requestBody.messages, requestBody.tools)); const outputTokens = usage?.completion_tokens || (await countGptMessagesTokens([assistantMessage])); // 异步保存 LLM 请求追踪记录 saveLLMRequestRecord({ requestId, body: requestBody, response: { ...(answerText && { answerText }), ...(reasoningText && { reasoningText }), ...(toolCalls?.length && { toolCalls }), finish_reason, usage: { inputTokens, outputTokens }, error } }); if (error) { finish_reason = 'error'; if (throwError) { throw error; } } const getEmptyResponseTip = () => { if (userKey?.baseUrl) { logger.warn(`User LLM response empty`, { baseUrl: userKey?.baseUrl, requestBody, finish_reason }); return `您的 OpenAI key 没有响应: ${JSON.stringify(body)}`; } else { logger.error(`LLM response empty`, { message: '', data: requestBody, finish_reason }); } return i18nT('chat:LLM_model_response_empty'); }; const isNotResponse = !answerText && !reasoningText && !toolCalls?.length && !error && (finish_reason === 'stop' || !finish_reason); const responseEmptyTip = isNotResponse ? getEmptyResponseTip() : undefined; return { error, isStreamResponse, responseEmptyTip, answerText, reasoningText, toolCalls, finish_reason, usage: { inputTokens: error ? 0 : inputTokens, outputTokens: error ? 0 : outputTokens }, requestId, // 返回请求追踪 ID requestMessages, assistantMessage, completeMessages: [...requestMessages, assistantMessage] }; } catch (error) { // 异步保存 LLM 请求追踪记录 saveLLMRequestRecord({ requestId, body: requestBody, response: { error: getErrText(error) } }); if (throwError) { throw error; } return { error, requestId, // 返回请求追踪 ID isStreamResponse: false, answerText: '', reasoningText: '', finish_reason: 'error', usage: { inputTokens: 0, outputTokens: 0 }, requestMessages: requestBody.messages, completeMessages: [...requestBody.messages] }; } }; type CompleteParams = Pick, 'body'> & ResponseEvents; type CompleteResponse = Pick< LLMResponse, 'answerText' | 'reasoningText' | 'toolCalls' | 'finish_reason' > & { usage?: CompletionUsage; error?: any; }; export const createStreamResponse = async ({ body, response, isAborted, onStreaming, onReasoning, onToolCall, onToolParam }: CompleteParams & { response: StreamChatType; isAborted?: CreateLLMResponseProps['isAborted']; }): Promise => { const { retainDatasetCite = true, tools, toolCallMode = 'toolChoice', model } = body; const modelData = getLLMModel(model); const { parsePart, getResponseData, updateFinishReason, updateError } = parseLLMStreamResponse(); if (tools?.length) { if (toolCallMode === 'toolChoice') { let callingTool: ChatCompletionMessageToolCall['function'] | null = null; const toolCalls: ChatCompletionMessageToolCall[] = []; try { for await (const part of response) { if (isAborted?.()) { response.controller?.abort(); updateFinishReason('close'); break; } const { reasoningContent, responseContent } = parsePart({ part, parseThinkTag: modelData.reasoning, retainDatasetCite }); if (reasoningContent) { onReasoning?.({ text: reasoningContent }); } if (responseContent) { onStreaming?.({ text: responseContent }); } const responseChoice = part.choices?.[0]?.delta; // Parse tool calls if (responseChoice?.tool_calls?.length) { responseChoice.tool_calls.forEach((toolCall, i) => { const index = toolCall.index ?? i; // Call new tool const hasNewTool = toolCall?.function?.name || callingTool; if (hasNewTool) { // Call new tool if (toolCall?.function?.name) { callingTool = { name: toolCall.function?.name || '', arguments: toolCall.function?.arguments || '' }; } else if (callingTool) { // Continue call(Perhaps the name of the previous function was incomplete) callingTool.name += toolCall.function?.name || ''; callingTool.arguments += toolCall.function?.arguments || ''; } // New tool, add to list. if (tools.find((item) => item.function.name === callingTool!.name)) { const call: ChatCompletionMessageToolCall = { id: toolCall.id || getNanoid(6), type: 'function', function: callingTool! }; toolCalls[index] = call; onToolCall?.({ call }); callingTool = null; } } else { /* arg 追加到当前工具的参数里 */ const arg: string = toolCall?.function?.arguments ?? ''; const currentTool = toolCalls[index]; if (currentTool && arg) { currentTool.function.arguments += arg; onToolParam?.({ tool: currentTool, params: arg }); } } }); } } } catch (error: any) { updateError(error?.error || error); } const { reasoningContent, content, finish_reason, usage, error } = getResponseData(); return { error, answerText: content, reasoningText: reasoningContent, finish_reason, usage, toolCalls: toolCalls.filter((call) => !!call) }; } else { let startResponseWrite = false; let answer = ''; try { for await (const part of response) { if (isAborted?.()) { response.controller?.abort(); updateFinishReason('close'); break; } const { reasoningContent, content, responseContent } = parsePart({ part, parseThinkTag: modelData.reasoning, retainDatasetCite }); answer += content; if (reasoningContent) { onReasoning?.({ text: reasoningContent }); } if (content) { if (startResponseWrite) { if (responseContent) { onStreaming?.({ text: responseContent }); } } else if (answer.length >= 3) { answer = answer.trimStart(); // Not call tool if (/0(:|:)/.test(answer)) { startResponseWrite = true; // find first : index const firstIndex = answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0:'); answer = answer.substring(firstIndex + 2).trim(); onStreaming?.({ text: answer }); } // Not response tool else if (/1(:|:)/.test(answer)) { } // Not start 1/0, start response else { startResponseWrite = true; onStreaming?.({ text: answer }); } } } } } catch (error: any) { updateError(error?.error || error); } const { reasoningContent, content, finish_reason, usage, error } = getResponseData(); const { answer: llmAnswer, streamAnswer, toolCalls } = parsePromptToolCall(content); if (streamAnswer) { onStreaming?.({ text: streamAnswer }); } toolCalls?.forEach((call) => { onToolCall?.({ call }); }); return { error, answerText: llmAnswer, reasoningText: reasoningContent, finish_reason, usage, toolCalls }; } } else { // Not use tool try { for await (const part of response) { if (isAborted?.()) { response.controller?.abort(); updateFinishReason('close'); break; } const { reasoningContent, responseContent } = parsePart({ part, parseThinkTag: modelData.reasoning, retainDatasetCite }); if (reasoningContent) { onReasoning?.({ text: reasoningContent }); } if (responseContent) { onStreaming?.({ text: responseContent }); } } } catch (error: any) { updateError(error?.error || error); } const { reasoningContent, content, finish_reason, usage, error } = getResponseData(); return { error, answerText: content, reasoningText: reasoningContent, finish_reason, usage }; } }; export const createCompleteResponse = async ({ body, response, onStreaming, onReasoning, onToolCall }: CompleteParams & { response: ChatCompletion }): Promise => { const { tools, toolCallMode = 'toolChoice', retainDatasetCite = true } = body; const modelData = getLLMModel(body.model); const finish_reason = response.choices?.[0]?.finish_reason as CompletionFinishReason; const usage = response.usage; // Content and think parse const { content, reasoningContent } = (() => { const content = response.choices?.[0]?.message?.content || ''; const reasoningContent: string = (response.choices?.[0]?.message as any)?.reasoning_content || ''; // API already parse reasoning content if (reasoningContent || !modelData.reasoning) { return { content, reasoningContent }; } const [think, answer] = parseReasoningContent(content); return { content: answer, reasoningContent: think }; })(); const formatReasonContent = removeDatasetCiteText(reasoningContent, retainDatasetCite); let formatContent = removeDatasetCiteText(content, retainDatasetCite); // Tool parse const { toolCalls } = (() => { if (tools?.length) { if (toolCallMode === 'toolChoice') { return { toolCalls: response.choices?.[0]?.message?.tool_calls || [] }; } // Prompt call const { answer, toolCalls } = parsePromptToolCall(formatContent); formatContent = answer; return { toolCalls }; } return { toolCalls: undefined }; })(); // Event response if (formatReasonContent) { onReasoning?.({ text: formatReasonContent }); } if (formatContent) { onStreaming?.({ text: formatContent }); } if (toolCalls?.length && onToolCall) { toolCalls.forEach((call) => { onToolCall({ call }); }); } return { error: response.error, reasoningText: formatReasonContent, answerText: formatContent, toolCalls, finish_reason, usage }; }; type CompletionsBodyType = | ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming; type InferCompletionsBody = T extends { stream: true } ? ChatCompletionCreateParamsStreaming : T extends { stream: false } ? ChatCompletionCreateParamsNonStreaming : ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming; type LLMRequestBodyType = Omit & { model: string | LLMModelItemType; stop?: string; response_format?: { type?: string; json_schema?: string; }; messages: ChatCompletionMessageParam[]; // Custom field retainDatasetCite?: boolean; toolCallMode?: 'toolChoice' | 'prompt'; useVision?: boolean; requestOrigin?: string; }; const llmCompletionsBodyFormat = async ({ retainDatasetCite, useVision, requestOrigin, tools, tool_choice, parallel_tool_calls, toolCallMode, ...body }: LLMRequestBodyType): Promise<{ requestBody: InferCompletionsBody; modelData: LLMModelItemType; }> => { const modelData = getLLMModel(body.model); if (!modelData) { return { requestBody: body as unknown as InferCompletionsBody, modelData }; } const response_format = (() => { if (!body.response_format?.type) return undefined; if (body.response_format.type === 'json_schema') { try { return { type: 'json_schema', json_schema: json5.parse(body.response_format?.json_schema as unknown as string) }; } catch (error) { throw new Error('Json schema error'); } } if (body.response_format.type) { return { type: body.response_format.type }; } return undefined; })(); const stop = body.stop ?? undefined; const maxTokens = computedMaxToken({ model: modelData, maxToken: body.max_tokens || undefined }); const formatStop = stop?.split('|').filter((item) => !!item.trim()); let requestBody = { ...body, max_tokens: maxTokens, model: modelData.model, temperature: typeof body.temperature === 'number' ? computedTemperature({ model: modelData, temperature: body.temperature }) : undefined, response_format, stop: formatStop?.length ? formatStop : undefined, ...(toolCallMode === 'toolChoice' && tools?.length && { tools, tool_choice, parallel_tool_calls }) } as T; // Filter undefined/null value requestBody = Object.fromEntries( Object.entries(requestBody).filter(([_, value]) => value !== null && value !== undefined) ) as T; const supportParams = getLLMSupportParams(modelData); if (!supportParams.temperature) { delete requestBody.temperature; } if (!supportParams.topP) { delete requestBody.top_p; } if (!supportParams.stop) { delete requestBody.stop; } if (!supportParams.responseFormat) { delete requestBody.response_format; } // field map if (modelData.fieldMap) { Object.entries(modelData.fieldMap).forEach(([sourceKey, targetKey]) => { // @ts-ignore requestBody[targetKey] = body[sourceKey]; // @ts-ignore delete requestBody[sourceKey]; }); } requestBody = { ...requestBody, ...modelData?.defaultConfig }; return { requestBody: requestBody as unknown as InferCompletionsBody, modelData }; }; const createChatCompletion = async ({ modelData, body, userKey, timeout, options }: { modelData: LLMModelItemType; body: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming; userKey?: OpenaiAccountType; timeout?: number; options?: OpenAI.RequestOptions; }): Promise< | { response: StreamChatType; isStreamResponse: true; } | { response: UnStreamChatType; isStreamResponse: false; } > => { try { if (!modelData) { return Promise.reject(`${body.model} not found`); } body.model = modelData.model; const formatTimeout = timeout ? timeout : 600000; const ai = getAIApi({ userKey, timeout: formatTimeout }); logger.debug('Start create chat completion', { model: body.model }); const response = await ai.chat.completions.create(body, { ...options, ...(modelData.requestUrl && !userKey ? { path: modelData.requestUrl } : {}), headers: { ...options?.headers, ...(modelData.requestAuth && !userKey ? { Authorization: `Bearer ${modelData.requestAuth}` } : {}) } }); const isStreamResponse = typeof response === 'object' && response !== null && ('iterator' in response || 'controller' in response); if (isStreamResponse) { return { response, isStreamResponse: true }; } return { response, isStreamResponse: false }; } catch (error) { if (userKey?.baseUrl) { logger.warn('User AI API error', { baseUrl: userKey?.baseUrl, request: body, error }); return Promise.reject(`您的 OpenAI key 出错了: ${getErrText(error)}`); } else { logger.error('LLM response error', { request: body, error }); } return Promise.reject(error); } };