Files
FastGPT/packages/service/core/ai/llm/request.ts
Archer 830eb19055 feature: V4.12.2 (#5525)
* feat: favorite apps & quick apps with their own configuration (#5515)

* chore: extract chat history and drawer; fix model selector

* feat: display favourite apps and make it configurable

* feat: favorite apps & quick apps with their own configuration

* fix: fix tab title and add loading state for searching

* fix: cascade delete favorite app and quick app while deleting relative app

* chore: make improvements

* fix: favourite apps ui

* fix: add permission for quick apps

* chore: fix permission & clear redundant code

* perf: chat home page code

* chatbox ui

* fix: 4.12.2-dev (#5520)

* fix: add empty placeholder; fix app quick status; fix tag and layout

* chore: add tab query for the setting tabs

* chore: use `useConfirm` hook instead of `MyModal`

* remove log

* fix: fix modal padding (#5521)

* perf: manage app

* feat: enhance model provider handling and update icon references (#5493)

* perf: model provider

* sdk package

* refactor: create llm response (#5499)

* feat: add LLM response processing functions, including the creation of stream-based and complete responses

* feat: add volta configuration for node and pnpm versions

* refactor: update LLM response handling and event structure in tool choice logic

* feat: update LLM response structure and integrate with tool choice logic

* refactor: clean up imports and remove unused streamResponse function in chat and toolChoice modules

* refactor: rename answer variable to answerBuffer for clarity in LLM response handling

* feat: enhance LLM response handling with tool options and integrate tools into chat and tool choice logic

* refactor: remove volta configuration from package.json

* refactor: reorganize LLM response types and ensure default values for token counts

* refactor: streamline LLM response handling by consolidating response structure and removing redundant checks

* refactor: enhance LLM response handling by consolidating tool options and streamlining event callbacks

* fix: build error

* refactor: update tool type definitions for consistency in tool handling

* feat: llm request function

* fix: ts

* fix: ts

* fix: ahook ts

* fix: variable name

* update lock

* ts version

* doc

* remove log

* fix: translation type

* perf: workflow status check

* fix: ts

* fix: prompt tool call

* fix: fix missing plugin interact window & make tag draggable (#5527)

* fix: incorrect select quick apps state; filter apps type (#5528)

* fix: usesafe translation

* perf: add quickapp modal

---------

Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com>
Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: francis <zhichengfan18@gmail.com>
2025-08-25 19:19:43 +08:00

649 lines
18 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type {
ChatCompletion,
ChatCompletionCreateParamsNonStreaming,
ChatCompletionCreateParamsStreaming,
ChatCompletionMessageParam,
ChatCompletionMessageToolCall,
CompletionFinishReason,
CompletionUsage,
OpenAI,
StreamChatType,
UnStreamChatType
} from '@fastgpt/global/core/ai/type';
import { computedTemperature, parseLLMStreamResponse, parseReasoningContent } from '../utils';
import { removeDatasetCiteText } from '@fastgpt/global/core/ai/llm/utils';
import { getAIApi } from '../config';
import type { OpenaiAccountType } from '@fastgpt/global/support/user/team/type';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { parsePromptToolCall, promptToolCallMessageRewrite } from './promptToolCall';
import { getLLMModel } from '../model';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
import { loadRequestMessages } from './utils';
import { addLog } from '../../../common/system/log';
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { i18nT } from '../../../../web/i18n/utils';
import { getErrText } from '@fastgpt/global/common/error/utils';
import json5 from 'json5';
type ResponseEvents = {
onStreaming?: ({ text }: { text: string }) => void;
onReasoning?: ({ text }: { text: string }) => void;
onToolCall?: ({ call }: { call: ChatCompletionMessageToolCall }) => void;
onToolParam?: ({ tool, params }: { tool: ChatCompletionMessageToolCall; params: string }) => void;
};
type CreateLLMResponseProps<T extends CompletionsBodyType> = {
userKey?: OpenaiAccountType;
body: LLMRequestBodyType<T>;
isAborted?: () => boolean | undefined;
custonHeaders?: Record<string, string>;
} & ResponseEvents;
type LLMResponse = {
isStreamResponse: boolean;
answerText: string;
reasoningText: string;
toolCalls?: ChatCompletionMessageToolCall[];
finish_reason: CompletionFinishReason;
getEmptyResponseTip: () => string;
usage: {
inputTokens: number;
outputTokens: number;
};
requestMessages: ChatCompletionMessageParam[];
assistantMessage: ChatCompletionMessageParam[];
completeMessages: ChatCompletionMessageParam[];
};
/*
底层封装 LLM 调用 帮助上层屏蔽 stream 和非 stream以及 toolChoice 和 promptTool 模式。
工具调用无论哪种模式,都存 toolChoice 的格式promptTool 通过修改 toolChoice 的结构,形成特定的 messages 进行调用。
*/
export const createLLMResponse = async <T extends CompletionsBodyType>(
args: CreateLLMResponseProps<T>
): Promise<LLMResponse> => {
const { body, custonHeaders, userKey } = args;
const { messages, useVision, requestOrigin, tools, toolCallMode } = body;
const modelData = getLLMModel(body.model);
// Messages process
const requestMessages = await loadRequestMessages({
messages,
useVision,
origin: requestOrigin
});
// Message process
const rewriteMessages = (() => {
if (tools?.length && toolCallMode === 'prompt') {
return promptToolCallMessageRewrite(requestMessages, tools);
}
return requestMessages;
})();
const requestBody = await llmCompletionsBodyFormat({
...body,
messages: rewriteMessages
});
// console.log(JSON.stringify(requestBody, null, 2));
const { response, isStreamResponse, getEmptyResponseTip } = await createChatCompletion({
body: requestBody,
userKey,
options: {
headers: {
Accept: 'application/json, text/plain, */*',
...custonHeaders
}
}
});
const { answerText, reasoningText, toolCalls, finish_reason, usage } = await (async () => {
if (isStreamResponse) {
return createStreamResponse({
response,
body,
isAborted: args.isAborted,
onStreaming: args.onStreaming,
onReasoning: args.onReasoning,
onToolCall: args.onToolCall,
onToolParam: args.onToolParam
});
} else {
return createCompleteResponse({
response,
body,
onStreaming: args.onStreaming,
onReasoning: args.onReasoning,
onToolCall: args.onToolCall
});
}
})();
const assistantMessage: ChatCompletionMessageParam[] = [
...(answerText || reasoningText
? [
{
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
content: answerText,
reasoning_text: reasoningText
}
]
: []),
...(toolCalls?.length
? [
{
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
tool_calls: toolCalls
}
]
: [])
];
// Usage count
const inputTokens =
usage?.prompt_tokens ?? (await countGptMessagesTokens(requestBody.messages, requestBody.tools));
const outputTokens = usage?.completion_tokens ?? (await countGptMessagesTokens(assistantMessage));
return {
isStreamResponse,
getEmptyResponseTip,
answerText,
reasoningText,
toolCalls,
finish_reason,
usage: {
inputTokens,
outputTokens
},
requestMessages,
assistantMessage,
completeMessages: [...requestMessages, ...assistantMessage]
};
};
type CompleteParams = Pick<CreateLLMResponseProps<CompletionsBodyType>, 'body'> & ResponseEvents;
type CompleteResponse = Pick<
LLMResponse,
'answerText' | 'reasoningText' | 'toolCalls' | 'finish_reason'
> & {
usage?: CompletionUsage;
};
export const createStreamResponse = async ({
body,
response,
isAborted,
onStreaming,
onReasoning,
onToolCall,
onToolParam
}: CompleteParams & {
response: StreamChatType;
isAborted?: () => boolean | undefined;
}): Promise<CompleteResponse> => {
const { retainDatasetCite = true, tools, toolCallMode = 'toolChoice', model } = body;
const modelData = getLLMModel(model);
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
if (tools?.length) {
if (toolCallMode === 'toolChoice') {
let callingTool: ChatCompletionMessageToolCall['function'] | null = null;
const toolCalls: ChatCompletionMessageToolCall[] = [];
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (responseContent) {
onStreaming?.({ text: responseContent });
}
const responseChoice = part.choices?.[0]?.delta;
// Parse tool calls
if (responseChoice?.tool_calls?.length) {
responseChoice.tool_calls.forEach((toolCall, i) => {
const index = toolCall.index ?? i;
// Call new tool
const hasNewTool = toolCall?.function?.name || callingTool;
if (hasNewTool) {
// Call new tool
if (toolCall?.function?.name) {
callingTool = {
name: toolCall.function?.name || '',
arguments: toolCall.function?.arguments || ''
};
} else if (callingTool) {
// Continue call(Perhaps the name of the previous function was incomplete)
callingTool.name += toolCall.function?.name || '';
callingTool.arguments += toolCall.function?.arguments || '';
}
// New tool, add to list.
if (tools.find((item) => item.function.name === callingTool!.name)) {
const call: ChatCompletionMessageToolCall = {
id: getNanoid(),
type: 'function',
function: callingTool!
};
toolCalls.push(call);
onToolCall?.({ call });
callingTool = null;
}
} else {
/* arg 追加到当前工具的参数里 */
const arg: string = toolCall?.function?.arguments ?? '';
const currentTool = toolCalls[index];
if (currentTool && arg) {
currentTool.function.arguments += arg;
onToolParam?.({ tool: currentTool, params: arg });
}
}
});
}
}
const { reasoningContent, content, finish_reason, usage } = getResponseData();
return {
answerText: content,
reasoningText: reasoningContent,
finish_reason,
usage,
toolCalls
};
} else {
let startResponseWrite = false;
let answer = '';
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, content, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
answer += content;
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (content) {
if (startResponseWrite) {
if (responseContent) {
onStreaming?.({ text: responseContent });
}
} else if (answer.length >= 3) {
answer = answer.trimStart();
// Not call tool
if (/0(:|)/.test(answer)) {
startResponseWrite = true;
// find first : index
const firstIndex =
answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0');
answer = answer.substring(firstIndex + 2).trim();
onStreaming?.({ text: answer });
}
// Not response tool
else if (/1(:|)/.test(answer)) {
}
// Not start 1/0, start response
else {
startResponseWrite = true;
onStreaming?.({ text: answer });
}
}
}
}
const { reasoningContent, content, finish_reason, usage } = getResponseData();
const { answer: llmAnswer, toolCalls } = parsePromptToolCall(content);
toolCalls?.forEach((call) => {
onToolCall?.({ call });
});
return {
answerText: llmAnswer,
reasoningText: reasoningContent,
finish_reason,
usage,
toolCalls
};
}
} else {
// Not use tool
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (responseContent) {
onStreaming?.({ text: responseContent });
}
}
const { reasoningContent, content, finish_reason, usage } = getResponseData();
return {
answerText: content,
reasoningText: reasoningContent,
finish_reason,
usage
};
}
};
export const createCompleteResponse = async ({
body,
response,
onStreaming,
onReasoning,
onToolCall
}: CompleteParams & { response: ChatCompletion }): Promise<CompleteResponse> => {
const { tools, toolCallMode = 'toolChoice', retainDatasetCite = true } = body;
const modelData = getLLMModel(body.model);
const finish_reason = response.choices?.[0]?.finish_reason as CompletionFinishReason;
const usage = response.usage;
// Content and think parse
const { content, reasoningContent } = (() => {
const content = response.choices?.[0]?.message?.content || '';
const reasoningContent: string =
(response.choices?.[0]?.message as any)?.reasoning_content || '';
// API already parse reasoning content
if (reasoningContent || !modelData.reasoning) {
return {
content,
reasoningContent
};
}
const [think, answer] = parseReasoningContent(content);
return {
content: answer,
reasoningContent: think
};
})();
const formatReasonContent = removeDatasetCiteText(reasoningContent, retainDatasetCite);
let formatContent = removeDatasetCiteText(content, retainDatasetCite);
// Tool parse
const { toolCalls } = (() => {
if (tools?.length) {
if (toolCallMode === 'toolChoice') {
return {
toolCalls: response.choices?.[0]?.message?.tool_calls || []
};
}
// Prompt call
const { answer, toolCalls } = parsePromptToolCall(formatContent);
formatContent = answer;
return {
toolCalls
};
}
return {
toolCalls: undefined
};
})();
// Event response
if (formatReasonContent) {
onReasoning?.({ text: formatReasonContent });
}
if (formatContent) {
onStreaming?.({ text: formatContent });
}
if (toolCalls?.length && onToolCall) {
toolCalls.forEach((call) => {
onToolCall({ call });
});
}
return {
reasoningText: formatReasonContent,
answerText: formatContent,
toolCalls,
finish_reason,
usage
};
};
type CompletionsBodyType =
| ChatCompletionCreateParamsNonStreaming
| ChatCompletionCreateParamsStreaming;
type InferCompletionsBody<T> = T extends { stream: true }
? ChatCompletionCreateParamsStreaming
: T extends { stream: false }
? ChatCompletionCreateParamsNonStreaming
: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
type LLMRequestBodyType<T> = Omit<T, 'model' | 'stop' | 'response_format' | 'messages'> & {
model: string | LLMModelItemType;
stop?: string;
response_format?: {
type?: string;
json_schema?: string;
};
messages: ChatCompletionMessageParam[];
// Custom field
retainDatasetCite?: boolean;
reasoning?: boolean; // Whether to response reasoning content
toolCallMode?: 'toolChoice' | 'prompt';
useVision?: boolean;
requestOrigin?: string;
};
const llmCompletionsBodyFormat = async <T extends CompletionsBodyType>({
reasoning,
retainDatasetCite,
useVision,
requestOrigin,
tools,
tool_choice,
parallel_tool_calls,
toolCallMode,
...body
}: LLMRequestBodyType<T>): Promise<InferCompletionsBody<T>> => {
const modelData = getLLMModel(body.model);
if (!modelData) {
return body as unknown as InferCompletionsBody<T>;
}
const response_format = (() => {
if (!body.response_format?.type) return undefined;
if (body.response_format.type === 'json_schema') {
try {
return {
type: 'json_schema',
json_schema: json5.parse(body.response_format?.json_schema as unknown as string)
};
} catch (error) {
throw new Error('Json schema error');
}
}
if (body.response_format.type) {
return {
type: body.response_format.type
};
}
return undefined;
})();
const stop = body.stop ?? undefined;
const requestBody = {
...body,
model: modelData.model,
temperature:
typeof body.temperature === 'number'
? computedTemperature({
model: modelData,
temperature: body.temperature
})
: undefined,
...modelData?.defaultConfig,
response_format,
stop: stop?.split('|'),
...(toolCallMode === 'toolChoice' && {
tools,
tool_choice,
parallel_tool_calls
})
} as T;
// field map
if (modelData.fieldMap) {
Object.entries(modelData.fieldMap).forEach(([sourceKey, targetKey]) => {
// @ts-ignore
requestBody[targetKey] = body[sourceKey];
// @ts-ignore
delete requestBody[sourceKey];
});
}
return requestBody as unknown as InferCompletionsBody<T>;
};
const createChatCompletion = async ({
modelData,
body,
userKey,
timeout,
options
}: {
modelData?: LLMModelItemType;
body: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
userKey?: OpenaiAccountType;
timeout?: number;
options?: OpenAI.RequestOptions;
}): Promise<
{
getEmptyResponseTip: () => string;
} & (
| {
response: StreamChatType;
isStreamResponse: true;
}
| {
response: UnStreamChatType;
isStreamResponse: false;
}
)
> => {
try {
// Rewrite model
const modelConstantsData = modelData || getLLMModel(body.model);
if (!modelConstantsData) {
return Promise.reject(`${body.model} not found`);
}
body.model = modelConstantsData.model;
const formatTimeout = timeout ? timeout : 600000;
const ai = getAIApi({
userKey,
timeout: formatTimeout
});
addLog.debug(`Start create chat completion`, {
model: body.model
});
const response = await ai.chat.completions.create(body, {
...options,
...(modelConstantsData.requestUrl ? { path: modelConstantsData.requestUrl } : {}),
headers: {
...options?.headers,
...(modelConstantsData.requestAuth
? { Authorization: `Bearer ${modelConstantsData.requestAuth}` }
: {})
}
});
const isStreamResponse =
typeof response === 'object' &&
response !== null &&
('iterator' in response || 'controller' in response);
const getEmptyResponseTip = () => {
addLog.warn(`LLM response empty`, {
baseUrl: userKey?.baseUrl,
requestBody: body
});
if (userKey?.baseUrl) {
return `您的 OpenAI key 没有响应: ${JSON.stringify(body)}`;
}
return i18nT('chat:LLM_model_response_empty');
};
if (isStreamResponse) {
return {
response,
isStreamResponse: true,
getEmptyResponseTip
};
}
return {
response,
isStreamResponse: false,
getEmptyResponseTip
};
} catch (error) {
addLog.error(`LLM response error`, error);
addLog.warn(`LLM response error`, {
baseUrl: userKey?.baseUrl,
requestBody: body
});
if (userKey?.baseUrl) {
return Promise.reject(`您的 OpenAI key 出错了: ${getErrText(error)}`);
}
return Promise.reject(error);
}
};