Files
FastGPT/packages/service/core/ai/llm/request.ts
T
Archer 9959707fb3 V4.14.9 fix issue (#6573)
* fix: session error

* fix: session error

* fix: workflow runtime and add e2b
2026-03-19 11:15:14 +08:00

887 lines
25 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type {
ChatCompletion,
ChatCompletionCreateParamsNonStreaming,
ChatCompletionCreateParamsStreaming,
ChatCompletionMessageParam,
ChatCompletionMessageToolCall,
CompletionFinishReason,
CompletionUsage,
OpenAI,
StreamChatType,
UnStreamChatType
} from '@fastgpt/global/core/ai/type';
import {
computedMaxToken,
computedTemperature,
parseLLMStreamResponse,
parseReasoningContent
} from '../utils';
import { getLLMSupportParams, removeDatasetCiteText } from '@fastgpt/global/core/ai/llm/utils';
import { getAIApi } from '../config';
import type { OpenaiAccountType } from '@fastgpt/global/support/user/team/type';
import { customNanoid, getNanoid } from '@fastgpt/global/common/string/tools';
import { parsePromptToolCall, promptToolCallMessageRewrite } from './promptCall';
import { getLLMModel } from '../model';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
import { loadRequestMessages } from './utils';
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.schema';
import { i18nT } from '../../../../web/i18n/utils';
import { getErrText } from '@fastgpt/global/common/error/utils';
import json5 from 'json5';
import { getLogger, LogCategories } from '../../../common/logger';
import { saveLLMRequestRecord } from '../record/controller';
const getRequestId = () => {
return customNanoid('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_-', 16);
};
const logger = getLogger(LogCategories.MODULE.AI.LLM);
export type ResponseEvents = {
onStreaming?: (e: { text: string }) => void;
onReasoning?: (e: { text: string }) => void;
onToolCall?: (e: { call: ChatCompletionMessageToolCall }) => void;
onToolParam?: (e: { tool: ChatCompletionMessageToolCall; params: string }) => void;
};
export type CreateLLMResponseProps<T extends CompletionsBodyType = CompletionsBodyType> = {
throwError?: boolean;
userKey?: OpenaiAccountType;
body: LLMRequestBodyType<T>;
isAborted?: () => boolean | undefined | null;
custonHeaders?: Record<string, string>;
maxContinuations?: number;
} & ResponseEvents;
type LLMResponse = {
requestId: string; // LLM 请求追踪 ID
error?: any;
isStreamResponse: boolean;
answerText: string;
reasoningText: string;
toolCalls?: ChatCompletionMessageToolCall[];
finish_reason: CompletionFinishReason;
responseEmptyTip?: string;
usage: {
inputTokens: number;
outputTokens: number;
};
requestMessages: ChatCompletionMessageParam[];
assistantMessage?: ChatCompletionMessageParam;
completeMessages: ChatCompletionMessageParam[];
};
/*
底层封装 LLM 调用 帮助上层屏蔽 stream 和非 stream,以及 toolChoice 和 promptTool 模式。
工具调用无论哪种模式,都存 toolChoice 的格式,promptTool 通过修改 toolChoice 的结构,形成特定的 messages 进行调用。
*/
export const createLLMResponse = async <T extends CompletionsBodyType>(
args: CreateLLMResponseProps<T>
): Promise<LLMResponse> => {
// 生成唯一的请求追踪 ID
const requestId = getRequestId();
const { throwError = true, body, custonHeaders, userKey, maxContinuations = 1 } = args;
const { messages, useVision, requestOrigin, tools, toolCallMode } = body;
// Messages process
const requestMessages = await loadRequestMessages({
messages,
useVision,
origin: requestOrigin
});
// Message process
const rewriteMessages = (() => {
if (tools?.length && toolCallMode === 'prompt') {
return promptToolCallMessageRewrite(requestMessages, tools);
}
return requestMessages;
})();
const { requestBody, modelData } = await llmCompletionsBodyFormat({
...body,
messages: rewriteMessages
});
// Initial request and accumulate results if finish_reason is 'length'
let accumulatedAnswerText = '';
let accumulatedReasoningText = '';
let accumulatedToolCalls: ChatCompletionMessageToolCall[] | undefined;
let currentFinishReason: CompletionFinishReason = 'stop';
let accumulatedUsage = {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0
};
let currentError: any = undefined;
let currentMessages = [...requestBody.messages];
let continuationCount = 0;
let isStreamResponse = false;
try {
while (continuationCount < maxContinuations) {
// console.debug(
// 'LLM Request Body:',
// JSON.stringify(
// {
// ...requestBody,
// messages: currentMessages
// },
// null,
// 2
// )
// );
const { response, isStreamResponse: currentIsStreamResponse } = await createChatCompletion({
body: {
...requestBody,
messages: currentMessages
},
modelData,
userKey,
options: {
headers: {
Accept: 'application/json, text/plain, */*',
...custonHeaders
}
}
});
// Save isStreamResponse from first request
if (continuationCount === 0) {
isStreamResponse = currentIsStreamResponse;
}
let { answerText, reasoningText, toolCalls, finish_reason, usage, error } =
await (async () => {
if (currentIsStreamResponse) {
return createStreamResponse({
response,
body,
isAborted: args.isAborted,
onStreaming: args.onStreaming,
onReasoning: args.onReasoning,
onToolCall: args.onToolCall,
onToolParam: args.onToolParam
});
} else {
return createCompleteResponse({
response,
body,
onStreaming: args.onStreaming,
onReasoning: args.onReasoning,
onToolCall: args.onToolCall
});
}
})();
// Format toolCalls
// 1. Auto complete arguments, avoid model not support "" arguments
toolCalls = toolCalls?.map((tool) => ({
...tool,
function: {
...tool.function,
arguments: tool.function.arguments || '{}'
}
}));
// Accumulate results
accumulatedAnswerText += answerText;
accumulatedReasoningText += reasoningText;
if (toolCalls?.length) {
accumulatedToolCalls = [...(accumulatedToolCalls || []), ...toolCalls];
}
currentFinishReason = finish_reason;
currentError = error;
// Accumulate usage
if (usage) {
accumulatedUsage.prompt_tokens += usage.prompt_tokens || 0;
accumulatedUsage.completion_tokens += usage.completion_tokens || 0;
accumulatedUsage.total_tokens += usage.total_tokens || 0;
}
// Check if we need to continue
// TODO: 输出超出模型输出上限
if (finish_reason === 'length' && !error) {
// Append assistant message and user continuation message
currentMessages = currentMessages.slice(0, requestBody.messages.length);
currentMessages = [
...currentMessages,
...(accumulatedToolCalls
? [
{
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
tool_calls: accumulatedToolCalls
}
]
: []),
{
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
...(accumulatedAnswerText && { content: accumulatedAnswerText }),
...(accumulatedReasoningText && { reasoning_content: accumulatedReasoningText })
},
{
role: ChatCompletionRequestMessageRoleEnum.User as 'user',
content: '[继续输出]'
}
];
logger.debug(`Continue LLM response due to length limit`, {
continuationCount,
completionTokens: usage?.completion_tokens
});
continuationCount++;
} else {
// Stop condition reached
break;
}
}
// Use accumulated results
let { answerText, reasoningText, toolCalls, finish_reason, usage, error } = {
answerText: accumulatedAnswerText,
reasoningText: accumulatedReasoningText,
toolCalls: accumulatedToolCalls,
finish_reason: currentFinishReason,
usage: accumulatedUsage,
error: currentError
};
const assistantMessage: ChatCompletionMessageParam = {
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
...(answerText && { content: answerText }),
...(reasoningText && { reasoning_content: reasoningText }),
...(toolCalls?.length && { tool_calls: toolCalls })
};
// Usage count
const inputTokens =
usage?.prompt_tokens ||
(await countGptMessagesTokens(requestBody.messages, requestBody.tools));
const outputTokens =
usage?.completion_tokens || (await countGptMessagesTokens([assistantMessage]));
// 异步保存 LLM 请求追踪记录
saveLLMRequestRecord({
requestId,
body: requestBody,
response: {
...(answerText && { answerText }),
...(reasoningText && { reasoningText }),
...(toolCalls?.length && { toolCalls }),
finish_reason,
usage: {
inputTokens,
outputTokens
},
error
}
});
if (error) {
finish_reason = 'error';
if (throwError) {
throw error;
}
}
const getEmptyResponseTip = () => {
if (userKey?.baseUrl) {
logger.warn(`User LLM response empty`, {
baseUrl: userKey?.baseUrl,
requestBody,
finish_reason
});
return `您的 OpenAI key 没有响应: ${JSON.stringify(body)}`;
} else {
logger.error(`LLM response empty`, {
message: '',
data: requestBody,
finish_reason
});
}
return i18nT('chat:LLM_model_response_empty');
};
const isNotResponse =
!answerText &&
!reasoningText &&
!toolCalls?.length &&
!error &&
(finish_reason === 'stop' || !finish_reason);
const responseEmptyTip = isNotResponse ? getEmptyResponseTip() : undefined;
return {
error,
isStreamResponse,
responseEmptyTip,
answerText,
reasoningText,
toolCalls,
finish_reason,
usage: {
inputTokens: error ? 0 : inputTokens,
outputTokens: error ? 0 : outputTokens
},
requestId, // 返回请求追踪 ID
requestMessages,
assistantMessage,
completeMessages: [...requestMessages, assistantMessage]
};
} catch (error) {
// 异步保存 LLM 请求追踪记录
saveLLMRequestRecord({
requestId,
body: requestBody,
response: {
error: getErrText(error)
}
});
if (throwError) {
throw error;
}
return {
error,
requestId, // 返回请求追踪 ID
isStreamResponse: false,
answerText: '',
reasoningText: '',
finish_reason: 'error',
usage: {
inputTokens: 0,
outputTokens: 0
},
requestMessages: requestBody.messages,
completeMessages: [...requestBody.messages]
};
}
};
type CompleteParams = Pick<CreateLLMResponseProps<CompletionsBodyType>, 'body'> & ResponseEvents;
type CompleteResponse = Pick<
LLMResponse,
'answerText' | 'reasoningText' | 'toolCalls' | 'finish_reason'
> & {
usage?: CompletionUsage;
error?: any;
};
export const createStreamResponse = async ({
body,
response,
isAborted,
onStreaming,
onReasoning,
onToolCall,
onToolParam
}: CompleteParams & {
response: StreamChatType;
isAborted?: CreateLLMResponseProps['isAborted'];
}): Promise<CompleteResponse> => {
const { retainDatasetCite = true, tools, toolCallMode = 'toolChoice', model } = body;
const modelData = getLLMModel(model);
const { parsePart, getResponseData, updateFinishReason, updateError } = parseLLMStreamResponse();
if (tools?.length) {
if (toolCallMode === 'toolChoice') {
let callingTool: ChatCompletionMessageToolCall['function'] | null = null;
const toolCalls: ChatCompletionMessageToolCall[] = [];
try {
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (responseContent) {
onStreaming?.({ text: responseContent });
}
const responseChoice = part.choices?.[0]?.delta;
// Parse tool calls
if (responseChoice?.tool_calls?.length) {
responseChoice.tool_calls.forEach((toolCall, i) => {
const index = toolCall.index ?? i;
// Call new tool
const hasNewTool = toolCall?.function?.name || callingTool;
if (hasNewTool) {
// Call new tool
if (toolCall?.function?.name) {
callingTool = {
name: toolCall.function?.name || '',
arguments: toolCall.function?.arguments || ''
};
} else if (callingTool) {
// Continue call(Perhaps the name of the previous function was incomplete)
callingTool.name += toolCall.function?.name || '';
callingTool.arguments += toolCall.function?.arguments || '';
}
// New tool, add to list.
if (tools.find((item) => item.function.name === callingTool!.name)) {
const call: ChatCompletionMessageToolCall = {
id: toolCall.id || getNanoid(6),
type: 'function',
function: callingTool!
};
toolCalls[index] = call;
onToolCall?.({ call });
callingTool = null;
}
} else {
/* arg 追加到当前工具的参数里 */
const arg: string = toolCall?.function?.arguments ?? '';
const currentTool = toolCalls[index];
if (currentTool && arg) {
currentTool.function.arguments += arg;
onToolParam?.({ tool: currentTool, params: arg });
}
}
});
}
}
} catch (error: any) {
updateError(error?.error || error);
}
const { reasoningContent, content, finish_reason, usage, error } = getResponseData();
return {
error,
answerText: content,
reasoningText: reasoningContent,
finish_reason,
usage,
toolCalls: toolCalls.filter((call) => !!call)
};
} else {
let startResponseWrite = false;
let answer = '';
try {
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, content, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
answer += content;
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (content) {
if (startResponseWrite) {
if (responseContent) {
onStreaming?.({ text: responseContent });
}
} else if (answer.length >= 3) {
answer = answer.trimStart();
// Not call tool
if (/0(:|)/.test(answer)) {
startResponseWrite = true;
// find first : index
const firstIndex =
answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0');
answer = answer.substring(firstIndex + 2).trim();
onStreaming?.({ text: answer });
}
// Not response tool
else if (/1(:|)/.test(answer)) {
}
// Not start 1/0, start response
else {
startResponseWrite = true;
onStreaming?.({ text: answer });
}
}
}
}
} catch (error: any) {
updateError(error?.error || error);
}
const { reasoningContent, content, finish_reason, usage, error } = getResponseData();
const { answer: llmAnswer, streamAnswer, toolCalls } = parsePromptToolCall(content);
if (streamAnswer) {
onStreaming?.({ text: streamAnswer });
}
toolCalls?.forEach((call) => {
onToolCall?.({ call });
});
return {
error,
answerText: llmAnswer,
reasoningText: reasoningContent,
finish_reason,
usage,
toolCalls
};
}
} else {
// Not use tool
try {
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (responseContent) {
onStreaming?.({ text: responseContent });
}
}
} catch (error: any) {
updateError(error?.error || error);
}
const { reasoningContent, content, finish_reason, usage, error } = getResponseData();
return {
error,
answerText: content,
reasoningText: reasoningContent,
finish_reason,
usage
};
}
};
export const createCompleteResponse = async ({
body,
response,
onStreaming,
onReasoning,
onToolCall
}: CompleteParams & { response: ChatCompletion }): Promise<CompleteResponse> => {
const { tools, toolCallMode = 'toolChoice', retainDatasetCite = true } = body;
const modelData = getLLMModel(body.model);
const finish_reason = response.choices?.[0]?.finish_reason as CompletionFinishReason;
const usage = response.usage;
// Content and think parse
const { content, reasoningContent } = (() => {
const content = response.choices?.[0]?.message?.content || '';
const reasoningContent: string =
(response.choices?.[0]?.message as any)?.reasoning_content || '';
// API already parse reasoning content
if (reasoningContent || !modelData.reasoning) {
return {
content,
reasoningContent
};
}
const [think, answer] = parseReasoningContent(content);
return {
content: answer,
reasoningContent: think
};
})();
const formatReasonContent = removeDatasetCiteText(reasoningContent, retainDatasetCite);
let formatContent = removeDatasetCiteText(content, retainDatasetCite);
// Tool parse
const { toolCalls } = (() => {
if (tools?.length) {
if (toolCallMode === 'toolChoice') {
return {
toolCalls: response.choices?.[0]?.message?.tool_calls || []
};
}
// Prompt call
const { answer, toolCalls } = parsePromptToolCall(formatContent);
formatContent = answer;
return {
toolCalls
};
}
return {
toolCalls: undefined
};
})();
// Event response
if (formatReasonContent) {
onReasoning?.({ text: formatReasonContent });
}
if (formatContent) {
onStreaming?.({ text: formatContent });
}
if (toolCalls?.length && onToolCall) {
toolCalls.forEach((call) => {
onToolCall({ call });
});
}
return {
error: response.error,
reasoningText: formatReasonContent,
answerText: formatContent,
toolCalls,
finish_reason,
usage
};
};
type CompletionsBodyType =
| ChatCompletionCreateParamsNonStreaming
| ChatCompletionCreateParamsStreaming;
type InferCompletionsBody<T> = T extends { stream: true }
? ChatCompletionCreateParamsStreaming
: T extends { stream: false }
? ChatCompletionCreateParamsNonStreaming
: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
type LLMRequestBodyType<T> = Omit<T, 'model' | 'stop' | 'response_format' | 'messages'> & {
model: string | LLMModelItemType;
stop?: string;
response_format?: {
type?: string;
json_schema?: string;
};
messages: ChatCompletionMessageParam[];
// Custom field
retainDatasetCite?: boolean;
toolCallMode?: 'toolChoice' | 'prompt';
useVision?: boolean;
requestOrigin?: string;
};
const llmCompletionsBodyFormat = async <T extends CompletionsBodyType>({
retainDatasetCite,
useVision,
requestOrigin,
tools,
tool_choice,
parallel_tool_calls,
toolCallMode,
...body
}: LLMRequestBodyType<T>): Promise<{
requestBody: InferCompletionsBody<T>;
modelData: LLMModelItemType;
}> => {
const modelData = getLLMModel(body.model);
if (!modelData) {
return {
requestBody: body as unknown as InferCompletionsBody<T>,
modelData
};
}
const response_format = (() => {
if (!body.response_format?.type) return undefined;
if (body.response_format.type === 'json_schema') {
try {
return {
type: 'json_schema',
json_schema: json5.parse(body.response_format?.json_schema as unknown as string)
};
} catch (error) {
throw new Error('Json schema error');
}
}
if (body.response_format.type) {
return {
type: body.response_format.type
};
}
return undefined;
})();
const stop = body.stop ?? undefined;
const maxTokens = computedMaxToken({
model: modelData,
maxToken: body.max_tokens || undefined
});
const formatStop = stop?.split('|').filter((item) => !!item.trim());
let requestBody = {
...body,
max_tokens: maxTokens,
model: modelData.model,
temperature:
typeof body.temperature === 'number'
? computedTemperature({
model: modelData,
temperature: body.temperature
})
: undefined,
response_format,
stop: formatStop?.length ? formatStop : undefined,
...(toolCallMode === 'toolChoice' &&
tools?.length && {
tools,
tool_choice,
parallel_tool_calls
})
} as T;
// Filter undefined/null value
requestBody = Object.fromEntries(
Object.entries(requestBody).filter(([_, value]) => value !== null && value !== undefined)
) as T;
const supportParams = getLLMSupportParams(modelData);
if (!supportParams.temperature) {
delete requestBody.temperature;
}
if (!supportParams.topP) {
delete requestBody.top_p;
}
if (!supportParams.stop) {
delete requestBody.stop;
}
if (!supportParams.responseFormat) {
delete requestBody.response_format;
}
// field map
if (modelData.fieldMap) {
Object.entries(modelData.fieldMap).forEach(([sourceKey, targetKey]) => {
// @ts-ignore
requestBody[targetKey] = body[sourceKey];
// @ts-ignore
delete requestBody[sourceKey];
});
}
requestBody = {
...requestBody,
...modelData?.defaultConfig
};
return {
requestBody: requestBody as unknown as InferCompletionsBody<T>,
modelData
};
};
const createChatCompletion = async ({
modelData,
body,
userKey,
timeout,
options
}: {
modelData: LLMModelItemType;
body: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
userKey?: OpenaiAccountType;
timeout?: number;
options?: OpenAI.RequestOptions;
}): Promise<
| {
response: StreamChatType;
isStreamResponse: true;
}
| {
response: UnStreamChatType;
isStreamResponse: false;
}
> => {
try {
if (!modelData) {
return Promise.reject(`${body.model} not found`);
}
body.model = modelData.model;
const formatTimeout = timeout ? timeout : 600000;
const ai = getAIApi({
userKey,
timeout: formatTimeout
});
logger.debug('Start create chat completion', { model: body.model });
const response = await ai.chat.completions.create(body, {
...options,
...(modelData.requestUrl && !userKey ? { path: modelData.requestUrl } : {}),
headers: {
...options?.headers,
...(modelData.requestAuth && !userKey
? { Authorization: `Bearer ${modelData.requestAuth}` }
: {})
}
});
const isStreamResponse =
typeof response === 'object' &&
response !== null &&
('iterator' in response || 'controller' in response);
if (isStreamResponse) {
return {
response,
isStreamResponse: true
};
}
return {
response,
isStreamResponse: false
};
} catch (error) {
if (userKey?.baseUrl) {
logger.warn('User AI API error', {
baseUrl: userKey?.baseUrl,
request: body,
error
});
return Promise.reject(`您的 OpenAI key 出错了: ${getErrText(error)}`);
} else {
logger.error('LLM response error', { request: body, error });
}
return Promise.reject(error);
}
};