Files
FastGPT/packages/service/core/ai/llm/request.ts
Archer 051455238c V4.13.0 features (#5693)
* feat: concat usage code (#5657)

* feat: dataset parse queue (#5661)

* feat: chat usage concat (#5669)

* perf: search test usage

* feat: chat usage concat

* fix: ts

* fix: ts

* feat: chat node response store (#5675)

* feat: chat node response store

* limit export

* test

* add ai generate node (#5506)

* add node copilot

* apply code

* update dynamic input & output

* add code test

* usage

* dynamic input border render

* optimize input & output

* optimize code

* update style

* change card to popover

* prompt editor basic

* prompt editor

* handle key down

* update prompt

* merge

* fix

* fix

* fix

* perf: workflow performance (#5677)

* feat: chat node response store

* limit export

* perf: workflow performance

* remove log

* fix: app template get duplicate (#5682)

* fix: dynamic input lock & code param (#5680)

* fix: dynamic input lock & code param

* fix

* fix

* feat: multi node data sync & system tool hot-swapping (#5575)

* Enhance file upload functionality and system tool integration (#5257)

* Enhance file upload functionality and system tool integration

* Add supplementary documents and optimize the upload interface

* Refactor file plugin types and update upload configurations

* Refactor MinIO configuration variables and clean up API plugin handlers for improved readability and consistency

* File name change

* Refactor SystemTools component layout

* fix i18n

* fix

* fix

* fix

* optimize app logs sort (#5310)

* log keys config modal

* multiple select

* api

* fontsize

* code

* chatid

* fix build

* fix

* fix component

* change name

* log keys config

* fix

* delete unused

* fix

* chore: minio service class rewrite

* chore: s3 plugin upload

* feat: system global cache with multi node sync feature

* feat: cache

* chore: move images

* docs: update & remove useless code

* chore: resolve merge conflicts

* chore: adjust the code

* chore: adjust

* deps: upgrade @fastgpt-sdk/plugin to 0.1.17

* perf(s3): s3 config

* fix: cache syncKey refresh

* fix: update @fastgpt-sdk/plugin to v0.1.18 removing mongo definition for fixing vitest

* chore: adjust

---------

Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Archer <545436317@qq.com>

* perf: s3 api code

* fix: toolbox empty when second open modal

* feat: http tool set (#5599)

* feat: http toolSet manual create front end

* feat: http toolSet manual create i18n

* feat: http toolSet manual create back end

* feat: auth, as tool param, adapt mcp

* fix: delete unused httpPlugin

* fix: delete FlowNodeTypeEnum.httpPlugin

* fix: AppTypeEnum include httpToolSet and httpPlugin

* fix

* delete console

* fix

* output schema

* fix

* fix bg

* fix base url

* fix

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* feat: app count

* perf: type check

* feat: catch error

* perf: plugin hot-swapping (#5688)

* perf: plugin hot-swapping

* chore: adjust code

* perf: cite data auth

* fix http toolset (#5689)

* temp

* fix http tool set

* fix

* template author hide

* dynamic IO ui

* fix: auth test

* fix dynamic input & output (#5690)

Co-authored-by: Archer <545436317@qq.com>

* fix: dynamic output id

* doc

* feat: model permission (#5666)

* feat(permission): model permission definition & api

* chore: support update model's collaborators

* feat: remove unauthedmodel when paste and import

* fix: type error

* fix: test setup global model list

* fix: http tool api

* chore: update fastgpt-sdk version

* chore: remove useless code

* chore: myModelList cache

* perf: user who is not manager can not configure model permission (FE)

* perf: model => Set

* feat: getMyModels moved to opensource code; cache the myModelList

* fix: type error

* fix dynamic input reference select type (#5694)

* remove unique index

* read file usage

* perf: connection error

* fix: abort token count

* fix: debug usage concat

* fix: immer clone object

* fix: immer clone object

* perf: throw error when error chat

* update audit i18n

* fix: 修复识别pptx文件后,返回内容顺序错乱问题 (#5696)

* fix: pptx sort error

* fix prompt editor (#5695)

* fix prompt editor

* fix

* fix: redis cache prefix (#5697)

* fix: redis cache prefix

* fix: cache

* fix: get model collaborator by model.model

* feat: hint for model per

* rename bucket name

* model ui

* doc

* doc

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com>
Co-authored-by: heheer <zhiyu44@qq.com>
Co-authored-by: Deepturn <33342819+Deepturn@users.noreply.github.com>
2025-09-24 22:40:31 +08:00

649 lines
18 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type {
ChatCompletion,
ChatCompletionCreateParamsNonStreaming,
ChatCompletionCreateParamsStreaming,
ChatCompletionMessageParam,
ChatCompletionMessageToolCall,
CompletionFinishReason,
CompletionUsage,
OpenAI,
StreamChatType,
UnStreamChatType
} from '@fastgpt/global/core/ai/type';
import { computedTemperature, parseLLMStreamResponse, parseReasoningContent } from '../utils';
import { removeDatasetCiteText } from '@fastgpt/global/core/ai/llm/utils';
import { getAIApi } from '../config';
import type { OpenaiAccountType } from '@fastgpt/global/support/user/team/type';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { parsePromptToolCall, promptToolCallMessageRewrite } from './promptToolCall';
import { getLLMModel } from '../model';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
import { loadRequestMessages } from './utils';
import { addLog } from '../../../common/system/log';
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { i18nT } from '../../../../web/i18n/utils';
import { getErrText } from '@fastgpt/global/common/error/utils';
import json5 from 'json5';
type ResponseEvents = {
onStreaming?: ({ text }: { text: string }) => void;
onReasoning?: ({ text }: { text: string }) => void;
onToolCall?: ({ call }: { call: ChatCompletionMessageToolCall }) => void;
onToolParam?: ({ tool, params }: { tool: ChatCompletionMessageToolCall; params: string }) => void;
};
type CreateLLMResponseProps<T extends CompletionsBodyType> = {
userKey?: OpenaiAccountType;
body: LLMRequestBodyType<T>;
isAborted?: () => boolean | undefined;
custonHeaders?: Record<string, string>;
} & ResponseEvents;
type LLMResponse = {
isStreamResponse: boolean;
answerText: string;
reasoningText: string;
toolCalls?: ChatCompletionMessageToolCall[];
finish_reason: CompletionFinishReason;
getEmptyResponseTip: () => string;
usage: {
inputTokens: number;
outputTokens: number;
};
requestMessages: ChatCompletionMessageParam[];
assistantMessage: ChatCompletionMessageParam[];
completeMessages: ChatCompletionMessageParam[];
};
/*
底层封装 LLM 调用 帮助上层屏蔽 stream 和非 stream以及 toolChoice 和 promptTool 模式。
工具调用无论哪种模式,都存 toolChoice 的格式promptTool 通过修改 toolChoice 的结构,形成特定的 messages 进行调用。
*/
export const createLLMResponse = async <T extends CompletionsBodyType>(
args: CreateLLMResponseProps<T>
): Promise<LLMResponse> => {
const { body, custonHeaders, userKey } = args;
const { messages, useVision, requestOrigin, tools, toolCallMode } = body;
// Messages process
const requestMessages = await loadRequestMessages({
messages,
useVision,
origin: requestOrigin
});
// Message process
const rewriteMessages = (() => {
if (tools?.length && toolCallMode === 'prompt') {
return promptToolCallMessageRewrite(requestMessages, tools);
}
return requestMessages;
})();
const requestBody = await llmCompletionsBodyFormat({
...body,
messages: rewriteMessages
});
// console.log(JSON.stringify(requestBody, null, 2));
const { response, isStreamResponse, getEmptyResponseTip } = await createChatCompletion({
body: requestBody,
userKey,
options: {
headers: {
Accept: 'application/json, text/plain, */*',
...custonHeaders
}
}
});
const { answerText, reasoningText, toolCalls, finish_reason, usage } = await (async () => {
if (isStreamResponse) {
return createStreamResponse({
response,
body,
isAborted: args.isAborted,
onStreaming: args.onStreaming,
onReasoning: args.onReasoning,
onToolCall: args.onToolCall,
onToolParam: args.onToolParam
});
} else {
return createCompleteResponse({
response,
body,
onStreaming: args.onStreaming,
onReasoning: args.onReasoning,
onToolCall: args.onToolCall
});
}
})();
const assistantMessage: ChatCompletionMessageParam[] = [
...(answerText || reasoningText
? [
{
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
content: answerText,
reasoning_text: reasoningText
}
]
: []),
...(toolCalls?.length
? [
{
role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
tool_calls: toolCalls
}
]
: [])
];
// Usage count
const inputTokens =
usage?.prompt_tokens || (await countGptMessagesTokens(requestBody.messages, requestBody.tools));
const outputTokens = usage?.completion_tokens || (await countGptMessagesTokens(assistantMessage));
return {
isStreamResponse,
getEmptyResponseTip,
answerText,
reasoningText,
toolCalls,
finish_reason,
usage: {
inputTokens,
outputTokens
},
requestMessages,
assistantMessage,
completeMessages: [...requestMessages, ...assistantMessage]
};
};
type CompleteParams = Pick<CreateLLMResponseProps<CompletionsBodyType>, 'body'> & ResponseEvents;
type CompleteResponse = Pick<
LLMResponse,
'answerText' | 'reasoningText' | 'toolCalls' | 'finish_reason'
> & {
usage?: CompletionUsage;
};
export const createStreamResponse = async ({
body,
response,
isAborted,
onStreaming,
onReasoning,
onToolCall,
onToolParam
}: CompleteParams & {
response: StreamChatType;
isAborted?: () => boolean | undefined;
}): Promise<CompleteResponse> => {
const { retainDatasetCite = true, tools, toolCallMode = 'toolChoice', model } = body;
const modelData = getLLMModel(model);
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
if (tools?.length) {
if (toolCallMode === 'toolChoice') {
let callingTool: ChatCompletionMessageToolCall['function'] | null = null;
const toolCalls: ChatCompletionMessageToolCall[] = [];
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (responseContent) {
onStreaming?.({ text: responseContent });
}
const responseChoice = part.choices?.[0]?.delta;
// Parse tool calls
if (responseChoice?.tool_calls?.length) {
responseChoice.tool_calls.forEach((toolCall, i) => {
const index = toolCall.index ?? i;
// Call new tool
const hasNewTool = toolCall?.function?.name || callingTool;
if (hasNewTool) {
// Call new tool
if (toolCall?.function?.name) {
callingTool = {
name: toolCall.function?.name || '',
arguments: toolCall.function?.arguments || ''
};
} else if (callingTool) {
// Continue call(Perhaps the name of the previous function was incomplete)
callingTool.name += toolCall.function?.name || '';
callingTool.arguments += toolCall.function?.arguments || '';
}
// New tool, add to list.
if (tools.find((item) => item.function.name === callingTool!.name)) {
const call: ChatCompletionMessageToolCall = {
id: getNanoid(),
type: 'function',
function: callingTool!
};
toolCalls.push(call);
onToolCall?.({ call });
callingTool = null;
}
} else {
/* arg 追加到当前工具的参数里 */
const arg: string = toolCall?.function?.arguments ?? '';
const currentTool = toolCalls[index];
if (currentTool && arg) {
currentTool.function.arguments += arg;
onToolParam?.({ tool: currentTool, params: arg });
}
}
});
}
}
const { reasoningContent, content, finish_reason, usage } = getResponseData();
return {
answerText: content,
reasoningText: reasoningContent,
finish_reason,
usage,
toolCalls
};
} else {
let startResponseWrite = false;
let answer = '';
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, content, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
answer += content;
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (content) {
if (startResponseWrite) {
if (responseContent) {
onStreaming?.({ text: responseContent });
}
} else if (answer.length >= 3) {
answer = answer.trimStart();
// Not call tool
if (/0(:|)/.test(answer)) {
startResponseWrite = true;
// find first : index
const firstIndex =
answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0');
answer = answer.substring(firstIndex + 2).trim();
onStreaming?.({ text: answer });
}
// Not response tool
else if (/1(:|)/.test(answer)) {
}
// Not start 1/0, start response
else {
startResponseWrite = true;
onStreaming?.({ text: answer });
}
}
}
}
const { reasoningContent, content, finish_reason, usage } = getResponseData();
const { answer: llmAnswer, streamAnswer, toolCalls } = parsePromptToolCall(content);
if (streamAnswer) {
onStreaming?.({ text: streamAnswer });
}
toolCalls?.forEach((call) => {
onToolCall?.({ call });
});
return {
answerText: llmAnswer,
reasoningText: reasoningContent,
finish_reason,
usage,
toolCalls
};
}
} else {
// Not use tool
for await (const part of response) {
if (isAborted?.()) {
response.controller?.abort();
updateFinishReason('close');
break;
}
const { reasoningContent, responseContent } = parsePart({
part,
parseThinkTag: modelData.reasoning,
retainDatasetCite
});
if (reasoningContent) {
onReasoning?.({ text: reasoningContent });
}
if (responseContent) {
onStreaming?.({ text: responseContent });
}
}
const { reasoningContent, content, finish_reason, usage } = getResponseData();
return {
answerText: content,
reasoningText: reasoningContent,
finish_reason,
usage
};
}
};
export const createCompleteResponse = async ({
body,
response,
onStreaming,
onReasoning,
onToolCall
}: CompleteParams & { response: ChatCompletion }): Promise<CompleteResponse> => {
const { tools, toolCallMode = 'toolChoice', retainDatasetCite = true } = body;
const modelData = getLLMModel(body.model);
const finish_reason = response.choices?.[0]?.finish_reason as CompletionFinishReason;
const usage = response.usage;
// Content and think parse
const { content, reasoningContent } = (() => {
const content = response.choices?.[0]?.message?.content || '';
const reasoningContent: string =
(response.choices?.[0]?.message as any)?.reasoning_content || '';
// API already parse reasoning content
if (reasoningContent || !modelData.reasoning) {
return {
content,
reasoningContent
};
}
const [think, answer] = parseReasoningContent(content);
return {
content: answer,
reasoningContent: think
};
})();
const formatReasonContent = removeDatasetCiteText(reasoningContent, retainDatasetCite);
let formatContent = removeDatasetCiteText(content, retainDatasetCite);
// Tool parse
const { toolCalls } = (() => {
if (tools?.length) {
if (toolCallMode === 'toolChoice') {
return {
toolCalls: response.choices?.[0]?.message?.tool_calls || []
};
}
// Prompt call
const { answer, toolCalls } = parsePromptToolCall(formatContent);
formatContent = answer;
return {
toolCalls
};
}
return {
toolCalls: undefined
};
})();
// Event response
if (formatReasonContent) {
onReasoning?.({ text: formatReasonContent });
}
if (formatContent) {
onStreaming?.({ text: formatContent });
}
if (toolCalls?.length && onToolCall) {
toolCalls.forEach((call) => {
onToolCall({ call });
});
}
return {
reasoningText: formatReasonContent,
answerText: formatContent,
toolCalls,
finish_reason,
usage
};
};
type CompletionsBodyType =
| ChatCompletionCreateParamsNonStreaming
| ChatCompletionCreateParamsStreaming;
type InferCompletionsBody<T> = T extends { stream: true }
? ChatCompletionCreateParamsStreaming
: T extends { stream: false }
? ChatCompletionCreateParamsNonStreaming
: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
type LLMRequestBodyType<T> = Omit<T, 'model' | 'stop' | 'response_format' | 'messages'> & {
model: string | LLMModelItemType;
stop?: string;
response_format?: {
type?: string;
json_schema?: string;
};
messages: ChatCompletionMessageParam[];
// Custom field
retainDatasetCite?: boolean;
toolCallMode?: 'toolChoice' | 'prompt';
useVision?: boolean;
requestOrigin?: string;
};
const llmCompletionsBodyFormat = async <T extends CompletionsBodyType>({
retainDatasetCite,
useVision,
requestOrigin,
tools,
tool_choice,
parallel_tool_calls,
toolCallMode,
...body
}: LLMRequestBodyType<T>): Promise<InferCompletionsBody<T>> => {
const modelData = getLLMModel(body.model);
if (!modelData) {
return body as unknown as InferCompletionsBody<T>;
}
const response_format = (() => {
if (!body.response_format?.type) return undefined;
if (body.response_format.type === 'json_schema') {
try {
return {
type: 'json_schema',
json_schema: json5.parse(body.response_format?.json_schema as unknown as string)
};
} catch (error) {
throw new Error('Json schema error');
}
}
if (body.response_format.type) {
return {
type: body.response_format.type
};
}
return undefined;
})();
const stop = body.stop ?? undefined;
const requestBody = {
...body,
model: modelData.model,
temperature:
typeof body.temperature === 'number'
? computedTemperature({
model: modelData,
temperature: body.temperature
})
: undefined,
...modelData?.defaultConfig,
response_format,
stop: stop?.split('|'),
...(toolCallMode === 'toolChoice' && {
tools,
tool_choice,
parallel_tool_calls
})
} as T;
// field map
if (modelData.fieldMap) {
Object.entries(modelData.fieldMap).forEach(([sourceKey, targetKey]) => {
// @ts-ignore
requestBody[targetKey] = body[sourceKey];
// @ts-ignore
delete requestBody[sourceKey];
});
}
return requestBody as unknown as InferCompletionsBody<T>;
};
const createChatCompletion = async ({
modelData,
body,
userKey,
timeout,
options
}: {
modelData?: LLMModelItemType;
body: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
userKey?: OpenaiAccountType;
timeout?: number;
options?: OpenAI.RequestOptions;
}): Promise<
{
getEmptyResponseTip: () => string;
} & (
| {
response: StreamChatType;
isStreamResponse: true;
}
| {
response: UnStreamChatType;
isStreamResponse: false;
}
)
> => {
try {
// Rewrite model
const modelConstantsData = modelData || getLLMModel(body.model);
if (!modelConstantsData) {
return Promise.reject(`${body.model} not found`);
}
body.model = modelConstantsData.model;
const formatTimeout = timeout ? timeout : 600000;
const ai = getAIApi({
userKey,
timeout: formatTimeout
});
addLog.debug(`Start create chat completion`, {
model: body.model
});
const response = await ai.chat.completions.create(body, {
...options,
...(modelConstantsData.requestUrl ? { path: modelConstantsData.requestUrl } : {}),
headers: {
...options?.headers,
...(modelConstantsData.requestAuth
? { Authorization: `Bearer ${modelConstantsData.requestAuth}` }
: {})
}
});
const isStreamResponse =
typeof response === 'object' &&
response !== null &&
('iterator' in response || 'controller' in response);
const getEmptyResponseTip = () => {
addLog.warn(`LLM response empty`, {
baseUrl: userKey?.baseUrl,
requestBody: body
});
if (userKey?.baseUrl) {
return `您的 OpenAI key 没有响应: ${JSON.stringify(body)}`;
}
return i18nT('chat:LLM_model_response_empty');
};
if (isStreamResponse) {
return {
response,
isStreamResponse: true,
getEmptyResponseTip
};
}
return {
response,
isStreamResponse: false,
getEmptyResponseTip
};
} catch (error) {
addLog.error(`LLM response error`, error);
addLog.warn(`LLM response error`, {
baseUrl: userKey?.baseUrl,
requestBody: body
});
if (userKey?.baseUrl) {
return Promise.reject(`您的 OpenAI key 出错了: ${getErrText(error)}`);
}
return Promise.reject(error);
}
};