feature: V4.12.2 (#5525)

* feat: favorite apps & quick apps with their own configuration (#5515) * chore: extract chat history and drawer; fix model selector * feat: display favourite apps and make it configurable * feat: favorite apps & quick apps with their own configuration * fix: fix tab title and add loading state for searching * fix: cascade delete favorite app and quick app while deleting relative app * chore: make improvements * fix: favourite apps ui * fix: add permission for quick apps * chore: fix permission & clear redundant code * perf: chat home page code * chatbox ui * fix: 4.12.2-dev (#5520) * fix: add empty placeholder; fix app quick status; fix tag and layout * chore: add tab query for the setting tabs * chore: use `useConfirm` hook instead of `MyModal` * remove log * fix: fix modal padding (#5521) * perf: manage app * feat: enhance model provider handling and update icon references (#5493) * perf: model provider * sdk package * refactor: create llm response (#5499) * feat: add LLM response processing functions, including the creation of stream-based and complete responses * feat: add volta configuration for node and pnpm versions * refactor: update LLM response handling and event structure in tool choice logic * feat: update LLM response structure and integrate with tool choice logic * refactor: clean up imports and remove unused streamResponse function in chat and toolChoice modules * refactor: rename answer variable to answerBuffer for clarity in LLM response handling * feat: enhance LLM response handling with tool options and integrate tools into chat and tool choice logic * refactor: remove volta configuration from package.json * refactor: reorganize LLM response types and ensure default values for token counts * refactor: streamline LLM response handling by consolidating response structure and removing redundant checks * refactor: enhance LLM response handling by consolidating tool options and streamlining event callbacks * fix: build error * refactor: update tool type definitions for consistency in tool handling * feat: llm request function * fix: ts * fix: ts * fix: ahook ts * fix: variable name * update lock * ts version * doc * remove log * fix: translation type * perf: workflow status check * fix: ts * fix: prompt tool call * fix: fix missing plugin interact window & make tag draggable (#5527) * fix: incorrect select quick apps state; filter apps type (#5528) * fix: usesafe translation * perf: add quickapp modal --------- Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com> Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: francis <zhichengfan18@gmail.com>
2026-05-05 01:02:59 +08:00 · 2025-08-25 19:19:43 +08:00
parent d6af93074b
commit 830eb19055
172 changed files with 7452 additions and 9209 deletions
@@ -1,16 +1,5 @@
 import OpenAI from '@fastgpt/global/core/ai';
-import type {
-  ChatCompletionCreateParamsNonStreaming,
-  ChatCompletionCreateParamsStreaming,
-  StreamChatType,
-  UnStreamChatType
-} from '@fastgpt/global/core/ai/type';
-import { getErrText } from '@fastgpt/global/common/error/utils';
-import { addLog } from '../../common/system/log';
-import { i18nT } from '../../../web/i18n/utils';
 import { type OpenaiAccountType } from '@fastgpt/global/support/user/team/type';
-import { getLLMModel } from './model';
-import { type LLMModelItemType } from '@fastgpt/global/core/ai/model.d';

 const aiProxyBaseUrl = process.env.AIPROXY_API_ENDPOINT
  ? `${process.env.AIPROXY_API_ENDPOINT}/v1`
@@ -43,100 +32,3 @@ export const getAxiosConfig = (props?: { userKey?: OpenaiAccountType }) => {
    authorization: `Bearer ${apiKey}`
  };
 };
-
-export const createChatCompletion = async ({
-  modelData,
-  body,
-  userKey,
-  timeout,
-  options
-}: {
-  modelData?: LLMModelItemType;
-  body: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
-  userKey?: OpenaiAccountType;
-  timeout?: number;
-  options?: OpenAI.RequestOptions;
-}): Promise<
-  {
-    getEmptyResponseTip: () => string;
-  } & (
-    | {
-        response: StreamChatType;
-        isStreamResponse: true;
-      }
-    | {
-        response: UnStreamChatType;
-        isStreamResponse: false;
-      }
-  )
-> => {
-  try {
-    // Rewrite model
-    const modelConstantsData = modelData || getLLMModel(body.model);
-    if (!modelConstantsData) {
-      return Promise.reject(`${body.model} not found`);
-    }
-    body.model = modelConstantsData.model;
-
-    const formatTimeout = timeout ? timeout : 600000;
-    const ai = getAIApi({
-      userKey,
-      timeout: formatTimeout
-    });
-
-    addLog.debug(`Start create chat completion`, {
-      model: body.model
-    });
-
-    const response = await ai.chat.completions.create(body, {
-      ...options,
-      ...(modelConstantsData.requestUrl ? { path: modelConstantsData.requestUrl } : {}),
-      headers: {
-        ...options?.headers,
-        ...(modelConstantsData.requestAuth
-          ? { Authorization: `Bearer ${modelConstantsData.requestAuth}` }
-          : {})
-      }
-    });
-
-    const isStreamResponse =
-      typeof response === 'object' &&
-      response !== null &&
-      ('iterator' in response || 'controller' in response);
-
-    const getEmptyResponseTip = () => {
-      addLog.warn(`LLM response empty`, {
-        baseUrl: userKey?.baseUrl,
-        requestBody: body
-      });
-      if (userKey?.baseUrl) {
-        return `您的 OpenAI key 没有响应: ${JSON.stringify(body)}`;
-      }
-      return i18nT('chat:LLM_model_response_empty');
-    };
-
-    if (isStreamResponse) {
-      return {
-        response,
-        isStreamResponse: true,
-        getEmptyResponseTip
-      };
-    }
-
-    return {
-      response,
-      isStreamResponse: false,
-      getEmptyResponseTip
-    };
-  } catch (error) {
-    addLog.error(`LLM response error`, error);
-    addLog.warn(`LLM response error`, {
-      baseUrl: userKey?.baseUrl,
-      requestBody: body
-    });
-    if (userKey?.baseUrl) {
-      return Promise.reject(`您的 OpenAI key 出错了: ${getErrText(error)}`);
-    }
-    return Promise.reject(error);
-  }
-};
@@ -19,7 +19,7 @@ import { delay } from '@fastgpt/global/common/system/utils';
 import { pluginClient } from '../../../thirdProvider/fastgptPlugin';
 import { setCron } from '../../../common/system/cron';

-export const loadSystemModels = async (init = false) => {
+export const loadSystemModels = async (init = false, language = 'en') => {
  const pushModel = (model: SystemModelItemType) => {
    global.systemModelList.push(model);

@@ -113,7 +113,10 @@ export const loadSystemModels = async (init = false) => {
        const modelData: any = {
          ...model,
          ...dbModel?.metadata,
-          provider: getModelProvider(dbModel?.metadata?.provider || (model.provider as any)).id,
+          provider: getModelProvider(
+            dbModel?.metadata?.provider || (model.provider as any),
+            language
+          ).id,
          type: dbModel?.metadata?.type || model.type,
          isCustom: false,

@@ -169,8 +172,8 @@ export const loadSystemModels = async (init = false) => {

    // Sort model list
    global.systemActiveModelList.sort((a, b) => {
-      const providerA = getModelProvider(a.provider);
-      const providerB = getModelProvider(b.provider);
+      const providerA = getModelProvider(a.provider, language);
+      const providerB = getModelProvider(b.provider, language);
      return providerA.order - providerB.order;
    });
    global.systemActiveDesensitizedModels = global.systemActiveModelList.map((model) => ({
@@ -1,14 +1,11 @@
 import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d';
-import { createChatCompletion } from '../config';
-import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
-import { loadRequestMessages } from '../../chat/utils';
-import { llmCompletionsBodyFormat, formatLLMResponse } from '../utils';
 import {
  QuestionGuidePrompt,
  QuestionGuideFooterPrompt
 } from '@fastgpt/global/core/ai/prompt/agent';
 import { addLog } from '../../../common/system/log';
 import json5 from 'json5';
+import { createLLMResponse } from '../llm/request';

 export async function createQuestionGuide({
  messages,
@@ -30,31 +27,23 @@ export async function createQuestionGuide({
      content: `${customPrompt || QuestionGuidePrompt}\n${QuestionGuideFooterPrompt}`
    }
  ];
-  const requestMessages = await loadRequestMessages({
-    messages: concatMessages,
-    useVision: false
-  });

-  const { response } = await createChatCompletion({
-    body: llmCompletionsBodyFormat(
-      {
-        model,
-        temperature: 0.1,
-        max_tokens: 200,
-        messages: requestMessages,
-        stream: true
-      },
-      model
-    )
+  const {
+    answerText: answer,
+    usage: { inputTokens, outputTokens }
+  } = await createLLMResponse({
+    body: {
+      model,
+      temperature: 0.1,
+      max_tokens: 200,
+      messages: concatMessages,
+      stream: true
+    }
  });
-  const { text: answer, usage } = await formatLLMResponse(response);

  const start = answer.indexOf('[');
  const end = answer.lastIndexOf(']');

-  const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(requestMessages));
-  const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));
-
  if (start === -1 || end === -1) {
    addLog.warn('Create question guide error', { answer });
    return {
@@ -1,13 +1,11 @@
 import { replaceVariable } from '@fastgpt/global/common/string/tools';
-import { createChatCompletion } from '../config';
 import { type ChatItemType } from '@fastgpt/global/core/chat/type';
-import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
 import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
 import { getLLMModel } from '../model';
-import { llmCompletionsBodyFormat, formatLLMResponse } from '../utils';
 import { addLog } from '../../../common/system/log';
-import { filterGPTMessageByMaxContext } from '../../chat/utils';
+import { filterGPTMessageByMaxContext } from '../llm/utils';
 import json5 from 'json5';
+import { createLLMResponse } from '../llm/request';

 /* 
    query extension - 问题扩展
@@ -167,20 +165,17 @@ assistant: ${chatBg}
    }
  ] as any;

-  const { response } = await createChatCompletion({
-    body: llmCompletionsBodyFormat(
-      {
-        stream: true,
-        model: modelData.model,
-        temperature: 0.1,
-        messages
-      },
-      modelData
-    )
+  const {
+    answerText: answer,
+    usage: { inputTokens, outputTokens }
+  } = await createLLMResponse({
+    body: {
+      stream: true,
+      model: modelData.model,
+      temperature: 0.1,
+      messages
+    }
  });
-  const { text: answer, usage } = await formatLLMResponse(response);
-  const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
-  const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));

  if (!answer) {
    return {
@@ -0,0 +1,41 @@
+import { replaceVariable } from '@fastgpt/global/common/string/tools';
+import type { ChatCompletionTool } from '@fastgpt/global/core/ai/type';
+
+export const getPromptToolCallPrompt = (tools: ChatCompletionTool['function'][]) => {
+  const prompt = `<ToolSkill>
+你是一个智能机器人，除了可以回答用户问题外，你还掌握工具的使用能力。有时候，你可以依赖工具的运行结果，来更准确的回答用户。
+
+工具使用了 JSON Schema 的格式声明，格式为：{name: 工具名; description: 工具描述; parameters: 工具参数}，其中 name 是工具的唯一标识，parameters 包含工具的参数、类型、描述、必填项等。
+
+请你根据工具描述，决定回答问题或是使用工具。你的每次输出都必须以0,1开头，代表是否需要调用工具：
+0: 不使用工具，直接回答内容。
+1: 使用工具，返回工具调用的参数。
+
+## 回答示例
+
+- 0: 你好，有什么可以帮助你的么？
+- 1: ${JSON.stringify({ name: 'searchToolId1' })}
+- 0: 现在是2022年5月5日，星期四，中午12点。
+- 1: ${JSON.stringify({ name: 'searchToolId2', arguments: { city: '杭州' } })}
+- 0: 今天杭州是晴天。
+- 1: ${JSON.stringify({ name: 'searchToolId3', arguments: { query: '杭州 天气 去哪里玩' } })}
+- 0: 今天杭州是晴天，适合去西湖、灵隐寺、千岛湖等地玩。
+
+## 可用工具列表
+
+"""
+{{toolSchema}}
+"""
+</ToolSkill>
+`;
+
+  const schema = tools.map((tool) => ({
+    name: tool.name,
+    description: tool.description,
+    parameters: tool.parameters
+  }));
+
+  return replaceVariable(prompt, {
+    toolSchema: JSON.stringify(schema)
+  });
+};
@@ -0,0 +1,118 @@
+import { getNanoid, sliceJsonStr } from '@fastgpt/global/common/string/tools';
+import json5 from 'json5';
+import type {
+  ChatCompletionMessageParam,
+  ChatCompletionMessageToolCall,
+  ChatCompletionSystemMessageParam,
+  ChatCompletionTool
+} from '@fastgpt/global/core/ai/type';
+import { getPromptToolCallPrompt } from './prompt';
+import { cloneDeep } from 'lodash';
+
+export const promptToolCallMessageRewrite = (
+  messages: ChatCompletionMessageParam[],
+  tools: ChatCompletionTool[]
+) => {
+  const cloneMessages = cloneDeep(messages);
+
+  // Add system prompt too messages
+  let systemMessage = cloneMessages.find(
+    (item) => item.role === 'system'
+  ) as ChatCompletionSystemMessageParam;
+
+  if (!systemMessage) {
+    systemMessage = {
+      role: 'system',
+      content: ''
+    };
+    cloneMessages.unshift(systemMessage);
+  }
+
+  if (typeof systemMessage?.content === 'string') {
+    systemMessage.content =
+      `${systemMessage.content}\n\n${getPromptToolCallPrompt(tools.map((tool) => tool.function))}`.trim();
+  } else if (Array.isArray(systemMessage.content)) {
+    systemMessage.content.push({
+      type: 'text',
+      text: getPromptToolCallPrompt(tools.map((tool) => tool.function))
+    });
+  } else {
+    throw new Error('Prompt call invalid input');
+  }
+
+  /* 
+    Format tool messages, rewrite assistant/tool message
+    1. Assistant, not tool_calls: skip
+    2. Assistant, tool_calls: rewrite to assistant text
+    3. Tool: rewrite to user text
+  */
+  for (let i = 0; i < cloneMessages.length; i++) {
+    const message = cloneMessages[i];
+    if (message.role === 'assistant') {
+      if (message.content && typeof message.content === 'string') {
+        message.content = `0: ${message.content}`;
+      } else if (message.tool_calls?.length) {
+        message.content = `1: ${JSON.stringify(message.tool_calls[0].function)}`;
+        delete message.tool_calls;
+      }
+    } else if (message.role === 'tool') {
+      cloneMessages.splice(i, 1, {
+        role: 'user',
+        content: `<ToolResponse>\n${message.content}\n</ToolResponse>`
+      });
+    }
+  }
+
+  return cloneMessages;
+};
+
+const ERROR_TEXT = 'Tool run error';
+export const parsePromptToolCall = (
+  str: string
+): {
+  answer: string;
+  toolCalls?: ChatCompletionMessageToolCall[];
+} => {
+  str = str.trim();
+  // 首先，使用正则表达式提取TOOL_ID和TOOL_ARGUMENTS
+  const prefixReg = /1(:|：)/;
+
+  if (prefixReg.test(str)) {
+    const toolString = sliceJsonStr(str);
+
+    try {
+      const toolCall = json5.parse(toolString) as { name: string; arguments: Object };
+
+      return {
+        answer: '',
+        toolCalls: [
+          {
+            id: getNanoid(),
+            type: 'function' as const,
+            function: {
+              name: toolCall.name,
+              arguments: JSON.stringify(toolCall.arguments)
+            }
+          }
+        ]
+      };
+    } catch (error) {
+      if (prefixReg.test(str)) {
+        return {
+          answer: ERROR_TEXT
+        };
+      } else {
+        return {
+          answer: str
+        };
+      }
+    }
+  } else {
+    const firstIndex = str.indexOf('0:') !== -1 ? str.indexOf('0:') : str.indexOf('0：');
+    if (firstIndex > -1 && firstIndex < 6) {
+      str = str.substring(firstIndex + 2).trim();
+    }
+
+    return { answer: str };
+  }
+};
@@ -0,0 +1,648 @@
+import type {
+  ChatCompletion,
+  ChatCompletionCreateParamsNonStreaming,
+  ChatCompletionCreateParamsStreaming,
+  ChatCompletionMessageParam,
+  ChatCompletionMessageToolCall,
+  CompletionFinishReason,
+  CompletionUsage,
+  OpenAI,
+  StreamChatType,
+  UnStreamChatType
+} from '@fastgpt/global/core/ai/type';
+import { computedTemperature, parseLLMStreamResponse, parseReasoningContent } from '../utils';
+import { removeDatasetCiteText } from '@fastgpt/global/core/ai/llm/utils';
+import { getAIApi } from '../config';
+import type { OpenaiAccountType } from '@fastgpt/global/support/user/team/type';
+import { getNanoid } from '@fastgpt/global/common/string/tools';
+import { parsePromptToolCall, promptToolCallMessageRewrite } from './promptToolCall';
+import { getLLMModel } from '../model';
+import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
+import { loadRequestMessages } from './utils';
+import { addLog } from '../../../common/system/log';
+import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
+import { i18nT } from '../../../../web/i18n/utils';
+import { getErrText } from '@fastgpt/global/common/error/utils';
+import json5 from 'json5';
+
+type ResponseEvents = {
+  onStreaming?: ({ text }: { text: string }) => void;
+  onReasoning?: ({ text }: { text: string }) => void;
+  onToolCall?: ({ call }: { call: ChatCompletionMessageToolCall }) => void;
+  onToolParam?: ({ tool, params }: { tool: ChatCompletionMessageToolCall; params: string }) => void;
+};
+
+type CreateLLMResponseProps<T extends CompletionsBodyType> = {
+  userKey?: OpenaiAccountType;
+  body: LLMRequestBodyType<T>;
+  isAborted?: () => boolean | undefined;
+  custonHeaders?: Record<string, string>;
+} & ResponseEvents;
+
+type LLMResponse = {
+  isStreamResponse: boolean;
+  answerText: string;
+  reasoningText: string;
+  toolCalls?: ChatCompletionMessageToolCall[];
+  finish_reason: CompletionFinishReason;
+  getEmptyResponseTip: () => string;
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+  };
+
+  requestMessages: ChatCompletionMessageParam[];
+  assistantMessage: ChatCompletionMessageParam[];
+  completeMessages: ChatCompletionMessageParam[];
+};
+
+/* 
+  底层封装 LLM 调用 帮助上层屏蔽 stream 和非 stream，以及 toolChoice 和 promptTool 模式。
+  工具调用无论哪种模式，都存 toolChoice 的格式，promptTool 通过修改 toolChoice 的结构，形成特定的 messages 进行调用。
+*/
+export const createLLMResponse = async <T extends CompletionsBodyType>(
+  args: CreateLLMResponseProps<T>
+): Promise<LLMResponse> => {
+  const { body, custonHeaders, userKey } = args;
+  const { messages, useVision, requestOrigin, tools, toolCallMode } = body;
+
+  const modelData = getLLMModel(body.model);
+
+  // Messages process
+  const requestMessages = await loadRequestMessages({
+    messages,
+    useVision,
+    origin: requestOrigin
+  });
+  // Message process
+  const rewriteMessages = (() => {
+    if (tools?.length && toolCallMode === 'prompt') {
+      return promptToolCallMessageRewrite(requestMessages, tools);
+    }
+    return requestMessages;
+  })();
+
+  const requestBody = await llmCompletionsBodyFormat({
+    ...body,
+    messages: rewriteMessages
+  });
+
+  // console.log(JSON.stringify(requestBody, null, 2));
+  const { response, isStreamResponse, getEmptyResponseTip } = await createChatCompletion({
+    body: requestBody,
+    userKey,
+    options: {
+      headers: {
+        Accept: 'application/json, text/plain, */*',
+        ...custonHeaders
+      }
+    }
+  });
+
+  const { answerText, reasoningText, toolCalls, finish_reason, usage } = await (async () => {
+    if (isStreamResponse) {
+      return createStreamResponse({
+        response,
+        body,
+        isAborted: args.isAborted,
+        onStreaming: args.onStreaming,
+        onReasoning: args.onReasoning,
+        onToolCall: args.onToolCall,
+        onToolParam: args.onToolParam
+      });
+    } else {
+      return createCompleteResponse({
+        response,
+        body,
+        onStreaming: args.onStreaming,
+        onReasoning: args.onReasoning,
+        onToolCall: args.onToolCall
+      });
+    }
+  })();
+
+  const assistantMessage: ChatCompletionMessageParam[] = [
+    ...(answerText || reasoningText
+      ? [
+          {
+            role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
+            content: answerText,
+            reasoning_text: reasoningText
+          }
+        ]
+      : []),
+    ...(toolCalls?.length
+      ? [
+          {
+            role: ChatCompletionRequestMessageRoleEnum.Assistant as 'assistant',
+            tool_calls: toolCalls
+          }
+        ]
+      : [])
+  ];
+
+  // Usage count
+  const inputTokens =
+    usage?.prompt_tokens ?? (await countGptMessagesTokens(requestBody.messages, requestBody.tools));
+  const outputTokens = usage?.completion_tokens ?? (await countGptMessagesTokens(assistantMessage));
+
+  return {
+    isStreamResponse,
+    getEmptyResponseTip,
+    answerText,
+    reasoningText,
+    toolCalls,
+    finish_reason,
+    usage: {
+      inputTokens,
+      outputTokens
+    },
+
+    requestMessages,
+    assistantMessage,
+    completeMessages: [...requestMessages, ...assistantMessage]
+  };
+};
+
+type CompleteParams = Pick<CreateLLMResponseProps<CompletionsBodyType>, 'body'> & ResponseEvents;
+
+type CompleteResponse = Pick<
+  LLMResponse,
+  'answerText' | 'reasoningText' | 'toolCalls' | 'finish_reason'
+> & {
+  usage?: CompletionUsage;
+};
+
+export const createStreamResponse = async ({
+  body,
+  response,
+  isAborted,
+  onStreaming,
+  onReasoning,
+  onToolCall,
+  onToolParam
+}: CompleteParams & {
+  response: StreamChatType;
+  isAborted?: () => boolean | undefined;
+}): Promise<CompleteResponse> => {
+  const { retainDatasetCite = true, tools, toolCallMode = 'toolChoice', model } = body;
+  const modelData = getLLMModel(model);
+
+  const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
+
+  if (tools?.length) {
+    if (toolCallMode === 'toolChoice') {
+      let callingTool: ChatCompletionMessageToolCall['function'] | null = null;
+      const toolCalls: ChatCompletionMessageToolCall[] = [];
+
+      for await (const part of response) {
+        if (isAborted?.()) {
+          response.controller?.abort();
+          updateFinishReason('close');
+          break;
+        }
+
+        const { reasoningContent, responseContent } = parsePart({
+          part,
+          parseThinkTag: modelData.reasoning,
+          retainDatasetCite
+        });
+
+        if (reasoningContent) {
+          onReasoning?.({ text: reasoningContent });
+        }
+        if (responseContent) {
+          onStreaming?.({ text: responseContent });
+        }
+
+        const responseChoice = part.choices?.[0]?.delta;
+
+        // Parse tool calls
+        if (responseChoice?.tool_calls?.length) {
+          responseChoice.tool_calls.forEach((toolCall, i) => {
+            const index = toolCall.index ?? i;
+
+            // Call new tool
+            const hasNewTool = toolCall?.function?.name || callingTool;
+            if (hasNewTool) {
+              // Call new tool
+              if (toolCall?.function?.name) {
+                callingTool = {
+                  name: toolCall.function?.name || '',
+                  arguments: toolCall.function?.arguments || ''
+                };
+              } else if (callingTool) {
+                // Continue call(Perhaps the name of the previous function was incomplete)
+                callingTool.name += toolCall.function?.name || '';
+                callingTool.arguments += toolCall.function?.arguments || '';
+              }
+
+              // New tool, add to list.
+              if (tools.find((item) => item.function.name === callingTool!.name)) {
+                const call: ChatCompletionMessageToolCall = {
+                  id: getNanoid(),
+                  type: 'function',
+                  function: callingTool!
+                };
+                toolCalls.push(call);
+                onToolCall?.({ call });
+                callingTool = null;
+              }
+            } else {
+              /* arg 追加到当前工具的参数里 */
+              const arg: string = toolCall?.function?.arguments ?? '';
+              const currentTool = toolCalls[index];
+              if (currentTool && arg) {
+                currentTool.function.arguments += arg;
+
+                onToolParam?.({ tool: currentTool, params: arg });
+              }
+            }
+          });
+        }
+      }
+
+      const { reasoningContent, content, finish_reason, usage } = getResponseData();
+
+      return {
+        answerText: content,
+        reasoningText: reasoningContent,
+        finish_reason,
+        usage,
+        toolCalls
+      };
+    } else {
+      let startResponseWrite = false;
+      let answer = '';
+
+      for await (const part of response) {
+        if (isAborted?.()) {
+          response.controller?.abort();
+          updateFinishReason('close');
+          break;
+        }
+
+        const { reasoningContent, content, responseContent } = parsePart({
+          part,
+          parseThinkTag: modelData.reasoning,
+          retainDatasetCite
+        });
+        answer += content;
+
+        if (reasoningContent) {
+          onReasoning?.({ text: reasoningContent });
+        }
+
+        if (content) {
+          if (startResponseWrite) {
+            if (responseContent) {
+              onStreaming?.({ text: responseContent });
+            }
+          } else if (answer.length >= 3) {
+            answer = answer.trimStart();
+
+            // Not call tool
+            if (/0(:|：)/.test(answer)) {
+              startResponseWrite = true;
+
+              // find first : index
+              const firstIndex =
+                answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0：');
+              answer = answer.substring(firstIndex + 2).trim();
+
+              onStreaming?.({ text: answer });
+            }
+            // Not response tool
+            else if (/1(:|：)/.test(answer)) {
+            }
+            // Not start 1/0, start response
+            else {
+              startResponseWrite = true;
+              onStreaming?.({ text: answer });
+            }
+          }
+        }
+      }
+
+      const { reasoningContent, content, finish_reason, usage } = getResponseData();
+      const { answer: llmAnswer, toolCalls } = parsePromptToolCall(content);
+
+      toolCalls?.forEach((call) => {
+        onToolCall?.({ call });
+      });
+
+      return {
+        answerText: llmAnswer,
+        reasoningText: reasoningContent,
+        finish_reason,
+        usage,
+        toolCalls
+      };
+    }
+  } else {
+    // Not use tool
+    for await (const part of response) {
+      if (isAborted?.()) {
+        response.controller?.abort();
+        updateFinishReason('close');
+        break;
+      }
+
+      const { reasoningContent, responseContent } = parsePart({
+        part,
+        parseThinkTag: modelData.reasoning,
+        retainDatasetCite
+      });
+
+      if (reasoningContent) {
+        onReasoning?.({ text: reasoningContent });
+      }
+      if (responseContent) {
+        onStreaming?.({ text: responseContent });
+      }
+    }
+
+    const { reasoningContent, content, finish_reason, usage } = getResponseData();
+
+    return {
+      answerText: content,
+      reasoningText: reasoningContent,
+      finish_reason,
+      usage
+    };
+  }
+};
+
+export const createCompleteResponse = async ({
+  body,
+  response,
+  onStreaming,
+  onReasoning,
+  onToolCall
+}: CompleteParams & { response: ChatCompletion }): Promise<CompleteResponse> => {
+  const { tools, toolCallMode = 'toolChoice', retainDatasetCite = true } = body;
+  const modelData = getLLMModel(body.model);
+
+  const finish_reason = response.choices?.[0]?.finish_reason as CompletionFinishReason;
+  const usage = response.usage;
+
+  // Content and think parse
+  const { content, reasoningContent } = (() => {
+    const content = response.choices?.[0]?.message?.content || '';
+    const reasoningContent: string =
+      (response.choices?.[0]?.message as any)?.reasoning_content || '';
+
+    // API already parse reasoning content
+    if (reasoningContent || !modelData.reasoning) {
+      return {
+        content,
+        reasoningContent
+      };
+    }
+
+    const [think, answer] = parseReasoningContent(content);
+    return {
+      content: answer,
+      reasoningContent: think
+    };
+  })();
+  const formatReasonContent = removeDatasetCiteText(reasoningContent, retainDatasetCite);
+  let formatContent = removeDatasetCiteText(content, retainDatasetCite);
+
+  // Tool parse
+  const { toolCalls } = (() => {
+    if (tools?.length) {
+      if (toolCallMode === 'toolChoice') {
+        return {
+          toolCalls: response.choices?.[0]?.message?.tool_calls || []
+        };
+      }
+
+      // Prompt call
+      const { answer, toolCalls } = parsePromptToolCall(formatContent);
+      formatContent = answer;
+
+      return {
+        toolCalls
+      };
+    }
+
+    return {
+      toolCalls: undefined
+    };
+  })();
+
+  // Event response
+  if (formatReasonContent) {
+    onReasoning?.({ text: formatReasonContent });
+  }
+  if (formatContent) {
+    onStreaming?.({ text: formatContent });
+  }
+  if (toolCalls?.length && onToolCall) {
+    toolCalls.forEach((call) => {
+      onToolCall({ call });
+    });
+  }
+
+  return {
+    reasoningText: formatReasonContent,
+    answerText: formatContent,
+    toolCalls,
+    finish_reason,
+    usage
+  };
+};
+
+type CompletionsBodyType =
+  | ChatCompletionCreateParamsNonStreaming
+  | ChatCompletionCreateParamsStreaming;
+type InferCompletionsBody<T> = T extends { stream: true }
+  ? ChatCompletionCreateParamsStreaming
+  : T extends { stream: false }
+    ? ChatCompletionCreateParamsNonStreaming
+    : ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
+
+type LLMRequestBodyType<T> = Omit<T, 'model' | 'stop' | 'response_format' | 'messages'> & {
+  model: string | LLMModelItemType;
+  stop?: string;
+  response_format?: {
+    type?: string;
+    json_schema?: string;
+  };
+  messages: ChatCompletionMessageParam[];
+
+  // Custom field
+  retainDatasetCite?: boolean;
+  reasoning?: boolean; // Whether to response reasoning content
+  toolCallMode?: 'toolChoice' | 'prompt';
+  useVision?: boolean;
+  requestOrigin?: string;
+};
+const llmCompletionsBodyFormat = async <T extends CompletionsBodyType>({
+  reasoning,
+  retainDatasetCite,
+  useVision,
+  requestOrigin,
+
+  tools,
+  tool_choice,
+  parallel_tool_calls,
+  toolCallMode,
+  ...body
+}: LLMRequestBodyType<T>): Promise<InferCompletionsBody<T>> => {
+  const modelData = getLLMModel(body.model);
+  if (!modelData) {
+    return body as unknown as InferCompletionsBody<T>;
+  }
+
+  const response_format = (() => {
+    if (!body.response_format?.type) return undefined;
+    if (body.response_format.type === 'json_schema') {
+      try {
+        return {
+          type: 'json_schema',
+          json_schema: json5.parse(body.response_format?.json_schema as unknown as string)
+        };
+      } catch (error) {
+        throw new Error('Json schema error');
+      }
+    }
+    if (body.response_format.type) {
+      return {
+        type: body.response_format.type
+      };
+    }
+    return undefined;
+  })();
+  const stop = body.stop ?? undefined;
+
+  const requestBody = {
+    ...body,
+    model: modelData.model,
+    temperature:
+      typeof body.temperature === 'number'
+        ? computedTemperature({
+            model: modelData,
+            temperature: body.temperature
+          })
+        : undefined,
+    ...modelData?.defaultConfig,
+    response_format,
+    stop: stop?.split('|'),
+    ...(toolCallMode === 'toolChoice' && {
+      tools,
+      tool_choice,
+      parallel_tool_calls
+    })
+  } as T;
+
+  // field map
+  if (modelData.fieldMap) {
+    Object.entries(modelData.fieldMap).forEach(([sourceKey, targetKey]) => {
+      // @ts-ignore
+      requestBody[targetKey] = body[sourceKey];
+      // @ts-ignore
+      delete requestBody[sourceKey];
+    });
+  }
+
+  return requestBody as unknown as InferCompletionsBody<T>;
+};
+const createChatCompletion = async ({
+  modelData,
+  body,
+  userKey,
+  timeout,
+  options
+}: {
+  modelData?: LLMModelItemType;
+  body: ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
+  userKey?: OpenaiAccountType;
+  timeout?: number;
+  options?: OpenAI.RequestOptions;
+}): Promise<
+  {
+    getEmptyResponseTip: () => string;
+  } & (
+    | {
+        response: StreamChatType;
+        isStreamResponse: true;
+      }
+    | {
+        response: UnStreamChatType;
+        isStreamResponse: false;
+      }
+  )
+> => {
+  try {
+    // Rewrite model
+    const modelConstantsData = modelData || getLLMModel(body.model);
+    if (!modelConstantsData) {
+      return Promise.reject(`${body.model} not found`);
+    }
+    body.model = modelConstantsData.model;
+
+    const formatTimeout = timeout ? timeout : 600000;
+    const ai = getAIApi({
+      userKey,
+      timeout: formatTimeout
+    });
+
+    addLog.debug(`Start create chat completion`, {
+      model: body.model
+    });
+
+    const response = await ai.chat.completions.create(body, {
+      ...options,
+      ...(modelConstantsData.requestUrl ? { path: modelConstantsData.requestUrl } : {}),
+      headers: {
+        ...options?.headers,
+        ...(modelConstantsData.requestAuth
+          ? { Authorization: `Bearer ${modelConstantsData.requestAuth}` }
+          : {})
+      }
+    });
+
+    const isStreamResponse =
+      typeof response === 'object' &&
+      response !== null &&
+      ('iterator' in response || 'controller' in response);
+
+    const getEmptyResponseTip = () => {
+      addLog.warn(`LLM response empty`, {
+        baseUrl: userKey?.baseUrl,
+        requestBody: body
+      });
+      if (userKey?.baseUrl) {
+        return `您的 OpenAI key 没有响应: ${JSON.stringify(body)}`;
+      }
+      return i18nT('chat:LLM_model_response_empty');
+    };
+
+    if (isStreamResponse) {
+      return {
+        response,
+        isStreamResponse: true,
+        getEmptyResponseTip
+      };
+    }
+
+    return {
+      response,
+      isStreamResponse: false,
+      getEmptyResponseTip
+    };
+  } catch (error) {
+    addLog.error(`LLM response error`, error);
+    addLog.warn(`LLM response error`, {
+      baseUrl: userKey?.baseUrl,
+      requestBody: body
+    });
+    if (userKey?.baseUrl) {
+      return Promise.reject(`您的 OpenAI key 出错了: ${getErrText(error)}`);
+    }
+    return Promise.reject(error);
+  }
+};
@@ -0,0 +1,406 @@
+import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
+import type {
+  ChatCompletionAssistantMessageParam,
+  ChatCompletionContentPart,
+  ChatCompletionContentPartRefusal,
+  ChatCompletionContentPartText,
+  ChatCompletionMessageParam,
+  SdkChatCompletionMessageParam
+} from '@fastgpt/global/core/ai/type.d';
+import axios from 'axios';
+import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import { i18nT } from '../../../../web/i18n/utils';
+import { addLog } from '../../../common/system/log';
+import { getImageBase64 } from '../../../common/file/image/utils';
+
+export const filterGPTMessageByMaxContext = async ({
+  messages = [],
+  maxContext
+}: {
+  messages: ChatCompletionMessageParam[];
+  maxContext: number;
+}) => {
+  if (!Array.isArray(messages)) {
+    return [];
+  }
+
+  // If the text length is less than half of the maximum token, no calculation is required
+  if (messages.length < 4) {
+    return messages;
+  }
+
+  // filter startWith system prompt
+  const chatStartIndex = messages.findIndex(
+    (item) => item.role !== ChatCompletionRequestMessageRoleEnum.System
+  );
+  const systemPrompts: ChatCompletionMessageParam[] = messages.slice(0, chatStartIndex);
+  const chatPrompts: ChatCompletionMessageParam[] = messages.slice(chatStartIndex);
+
+  if (chatPrompts.length === 0) {
+    return systemPrompts;
+  }
+
+  // reduce token of systemPrompt
+  maxContext -= await countGptMessagesTokens(systemPrompts);
+
+  /* 截取时候保证一轮内容的完整性
+    1. user - assistant - user
+    2. user - assistant - tool
+    3. user - assistant - tool - tool - tool
+    3. user - assistant - tool - assistant - tool
+    4. user - assistant - assistant - tool - tool
+  */
+  // Save the last chat prompt(question)
+  let chats: ChatCompletionMessageParam[] = [];
+  let tmpChats: ChatCompletionMessageParam[] = [];
+
+  // 从后往前截取对话内容, 每次到 user 则认为是一组完整信息
+  while (chatPrompts.length > 0) {
+    const lastMessage = chatPrompts.pop();
+    if (!lastMessage) {
+      break;
+    }
+
+    // 遇到 user，说明到了一轮完整信息，可以开始判断是否需要保留
+    if (lastMessage.role === ChatCompletionRequestMessageRoleEnum.User) {
+      const tokens = await countGptMessagesTokens([lastMessage, ...tmpChats]);
+      maxContext -= tokens;
+      // 该轮信息整体 tokens 超出范围，这段数据不要了。但是至少保证一组。
+      if (maxContext < 0 && chats.length > 0) {
+        break;
+      }
+
+      chats = [lastMessage, ...tmpChats].concat(chats);
+      tmpChats = [];
+    } else {
+      tmpChats.unshift(lastMessage);
+    }
+  }
+
+  return [...systemPrompts, ...chats];
+};
+
+/* 
+  Format requested messages
+  1. If not useVision, only retain text.
+  2. Remove file_url
+  3. If useVision, parse url from question, and load image from url(Local url)
+*/
+export const loadRequestMessages = async ({
+  messages,
+  useVision = true,
+  origin
+}: {
+  messages: ChatCompletionMessageParam[];
+  useVision?: boolean;
+  origin?: string;
+}) => {
+  const parseSystemMessage = (
+    content: string | ChatCompletionContentPartText[]
+  ): string | ChatCompletionContentPartText[] | undefined => {
+    if (typeof content === 'string') {
+      if (!content) return;
+      return content;
+    }
+
+    const arrayContent = content
+      .filter((item) => item.text)
+      .map((item) => item.text)
+      .join('\n\n');
+
+    return arrayContent;
+  };
+  // Parse user content(text and img) Store history => api messages
+  const parseUserContent = async (content: string | ChatCompletionContentPart[]) => {
+    // Split question text and image
+    const parseStringWithImages = (input: string): ChatCompletionContentPart[] => {
+      if (!useVision || input.length > 500) {
+        return [{ type: 'text', text: input }];
+      }
+
+      // 正则表达式匹配图片URL
+      const imageRegex =
+        /(https?:\/\/[^\s/$.?#].[^\s]*\.(?:png|jpe?g|gif|webp|bmp|tiff?|svg|ico|heic|avif))/gi;
+
+      const result: ChatCompletionContentPart[] = [];
+
+      // 提取所有HTTPS图片URL并添加到result开头
+      const httpsImages = [...new Set(Array.from(input.matchAll(imageRegex), (m) => m[0]))];
+      httpsImages.forEach((url) => {
+        result.push({
+          type: 'image_url',
+          image_url: {
+            url: url
+          }
+        });
+      });
+
+      // Too many images return text
+      if (httpsImages.length > 4) {
+        return [{ type: 'text', text: input }];
+      }
+
+      // 添加原始input作为文本
+      result.push({ type: 'text', text: input });
+      return result;
+    };
+    // Load image to base64
+    const loadUserContentImage = async (content: ChatCompletionContentPart[]) => {
+      return Promise.all(
+        content.map(async (item) => {
+          if (item.type === 'image_url') {
+            // Remove url origin
+            const imgUrl = (() => {
+              if (origin && item.image_url.url.startsWith(origin)) {
+                return item.image_url.url.replace(origin, '');
+              }
+              return item.image_url.url;
+            })();
+
+            // base64 image
+            if (imgUrl.startsWith('data:image/')) {
+              return item;
+            }
+
+            try {
+              // If imgUrl is a local path, load image from local, and set url to base64
+              if (imgUrl.startsWith('/') || process.env.MULTIPLE_DATA_TO_BASE64 === 'true') {
+                const { completeBase64: base64 } = await getImageBase64(imgUrl);
+
+                return {
+                  ...item,
+                  image_url: {
+                    ...item.image_url,
+                    url: base64
+                  }
+                };
+              }
+
+              // 检查下这个图片是否可以被访问，如果不行的话，则过滤掉
+              const response = await axios.head(imgUrl, {
+                timeout: 10000
+              });
+              if (response.status < 200 || response.status >= 400) {
+                addLog.info(`Filter invalid image: ${imgUrl}`);
+                return;
+              }
+            } catch (error: any) {
+              if (error?.response?.status === 405) {
+                return item;
+              }
+              addLog.warn(`Filter invalid image: ${imgUrl}`, { error });
+              return;
+            }
+          }
+          return item;
+        })
+      ).then((res) => res.filter(Boolean) as ChatCompletionContentPart[]);
+    };
+
+    if (content === undefined) return;
+    if (typeof content === 'string') {
+      if (content === '') return;
+
+      const loadImageContent = await loadUserContentImage(parseStringWithImages(content));
+      if (loadImageContent.length === 0) return;
+      return loadImageContent;
+    }
+
+    const result = (
+      await Promise.all(
+        content.map(async (item) => {
+          if (item.type === 'text') {
+            // If it is array, not need to parse image
+            if (item.text) return item;
+            return;
+          }
+          if (item.type === 'file_url') return; // LLM not support file_url
+          if (item.type === 'image_url') {
+            // close vision, remove image_url
+            if (!useVision) return;
+            // remove empty image_url
+            if (!item.image_url.url) return;
+          }
+
+          return item;
+        })
+      )
+    )
+      .flat()
+      .filter(Boolean) as ChatCompletionContentPart[];
+
+    const loadImageContent = await loadUserContentImage(result);
+
+    if (loadImageContent.length === 0) return;
+    return loadImageContent;
+  };
+
+  const formatAssistantItem = (item: ChatCompletionAssistantMessageParam) => {
+    return {
+      role: item.role,
+      content: item.content,
+      function_call: item.function_call,
+      name: item.name,
+      refusal: item.refusal,
+      tool_calls: item.tool_calls
+    };
+  };
+  const parseAssistantContent = (
+    content:
+      | string
+      | (ChatCompletionContentPartText | ChatCompletionContentPartRefusal)[]
+      | null
+      | undefined
+  ) => {
+    if (typeof content === 'string') {
+      return content || '';
+    }
+    // 交互节点
+    if (!content) return '';
+
+    const result = content.filter((item) => item?.type === 'text');
+    if (result.length === 0) return '';
+
+    return result.map((item) => item.text).join('\n');
+  };
+
+  if (messages.length === 0) {
+    return Promise.reject(i18nT('common:core.chat.error.Messages empty'));
+  }
+
+  // 合并相邻 role 的内容，只保留一个 role， content 变成数组。 assistant 的话，工具调用不合并。
+  const mergeMessages = ((messages: ChatCompletionMessageParam[]): ChatCompletionMessageParam[] => {
+    return messages.reduce((mergedMessages: ChatCompletionMessageParam[], currentMessage) => {
+      const lastMessage = mergedMessages[mergedMessages.length - 1];
+
+      if (!lastMessage) {
+        return [currentMessage];
+      }
+
+      if (
+        lastMessage.role === ChatCompletionRequestMessageRoleEnum.System &&
+        currentMessage.role === ChatCompletionRequestMessageRoleEnum.System
+      ) {
+        const lastContent: ChatCompletionContentPartText[] = Array.isArray(lastMessage.content)
+          ? lastMessage.content
+          : [{ type: 'text', text: lastMessage.content || '' }];
+        const currentContent: ChatCompletionContentPartText[] = Array.isArray(
+          currentMessage.content
+        )
+          ? currentMessage.content
+          : [{ type: 'text', text: currentMessage.content || '' }];
+        lastMessage.content = [...lastContent, ...currentContent];
+      } // Handle user messages
+      else if (
+        lastMessage.role === ChatCompletionRequestMessageRoleEnum.User &&
+        currentMessage.role === ChatCompletionRequestMessageRoleEnum.User
+      ) {
+        const lastContent: ChatCompletionContentPart[] = Array.isArray(lastMessage.content)
+          ? lastMessage.content
+          : [{ type: 'text', text: lastMessage.content }];
+        const currentContent: ChatCompletionContentPart[] = Array.isArray(currentMessage.content)
+          ? currentMessage.content
+          : [{ type: 'text', text: currentMessage.content }];
+        lastMessage.content = [...lastContent, ...currentContent];
+      } else if (
+        lastMessage.role === ChatCompletionRequestMessageRoleEnum.Assistant &&
+        currentMessage.role === ChatCompletionRequestMessageRoleEnum.Assistant
+      ) {
+        // Content 不为空的对象，或者是交互节点
+        if (
+          (typeof lastMessage.content === 'string' ||
+            Array.isArray(lastMessage.content) ||
+            lastMessage.interactive) &&
+          (typeof currentMessage.content === 'string' ||
+            Array.isArray(currentMessage.content) ||
+            currentMessage.interactive)
+        ) {
+          const lastContent: (ChatCompletionContentPartText | ChatCompletionContentPartRefusal)[] =
+            Array.isArray(lastMessage.content)
+              ? lastMessage.content
+              : [{ type: 'text', text: lastMessage.content || '' }];
+          const currentContent: (
+            | ChatCompletionContentPartText
+            | ChatCompletionContentPartRefusal
+          )[] = Array.isArray(currentMessage.content)
+            ? currentMessage.content
+            : [{ type: 'text', text: currentMessage.content || '' }];
+
+          lastMessage.content = [...lastContent, ...currentContent];
+        } else {
+          // 有其中一个没有 content，说明不是连续的文本输出
+          mergedMessages.push(currentMessage);
+        }
+      } else {
+        mergedMessages.push(currentMessage);
+      }
+
+      return mergedMessages;
+    }, []);
+  })(messages);
+
+  const loadMessages = (
+    await Promise.all(
+      mergeMessages.map(async (item, i) => {
+        if (item.role === ChatCompletionRequestMessageRoleEnum.System) {
+          const content = parseSystemMessage(item.content);
+          if (!content) return;
+          return {
+            ...item,
+            content
+          };
+        } else if (item.role === ChatCompletionRequestMessageRoleEnum.User) {
+          const content = await parseUserContent(item.content);
+          if (!content) {
+            return {
+              ...item,
+              content: 'null'
+            };
+          }
+
+          const formatContent = (() => {
+            if (Array.isArray(content) && content.length === 1 && content[0].type === 'text') {
+              return content[0].text;
+            }
+            return content;
+          })();
+
+          return {
+            ...item,
+            content: formatContent
+          };
+        } else if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
+          if (item.tool_calls || item.function_call) {
+            return formatAssistantItem(item);
+          }
+
+          const parseContent = parseAssistantContent(item.content);
+
+          // 如果内容为空，且前后不再是 assistant，需要补充成 null，避免丢失 user-assistant 的交互
+          const formatContent = (() => {
+            const lastItem = mergeMessages[i - 1];
+            const nextItem = mergeMessages[i + 1];
+            if (
+              parseContent === '' &&
+              (lastItem?.role === ChatCompletionRequestMessageRoleEnum.Assistant ||
+                nextItem?.role === ChatCompletionRequestMessageRoleEnum.Assistant)
+            ) {
+              return;
+            }
+            return parseContent || 'null';
+          })();
+          if (!formatContent) return;
+
+          return {
+            ...formatAssistantItem(item),
+            content: formatContent
+          };
+        } else {
+          return item;
+        }
+      })
+    )
+  ).filter(Boolean) as ChatCompletionMessageParam[];
+
+  return loadMessages as SdkChatCompletionMessageParam[];
+};
@@ -1,10 +1,12 @@
 import { cloneDeep } from 'lodash';
 import { type SystemModelItemType } from './type';
+import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';

 export const getDefaultLLMModel = () => global?.systemDefaultModel.llm!;
-export const getLLMModel = (model?: string) => {
+export const getLLMModel = (model?: string | LLMModelItemType) => {
  if (!model) return getDefaultLLMModel();
-  return global.llmModelMap.get(model) || getDefaultLLMModel();
+
+  return typeof model === 'string' ? global.llmModelMap.get(model) || getDefaultLLMModel() : model;
 };

 export const getDatasetModel = (model?: string) => {
@@ -1,17 +1,7 @@
 import { type LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
-import type {
-  ChatCompletionCreateParamsNonStreaming,
-  ChatCompletionCreateParamsStreaming,
-  CompletionFinishReason,
-  StreamChatType,
-  UnStreamChatType,
-  CompletionUsage,
-  ChatCompletionMessageToolCall
-} from '@fastgpt/global/core/ai/type';
-import { getLLMModel } from './model';
+import type { CompletionFinishReason, CompletionUsage } from '@fastgpt/global/core/ai/type';
 import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants';
-import { getNanoid } from '@fastgpt/global/common/string/tools';
-import json5 from 'json5';
+import { removeDatasetCiteText } from '@fastgpt/global/core/ai/llm/utils';

 /* 
  Count response max token
@@ -46,168 +36,7 @@ export const computedTemperature = ({
  return temperature;
 };

-type CompletionsBodyType =
-  | ChatCompletionCreateParamsNonStreaming
-  | ChatCompletionCreateParamsStreaming;
-type InferCompletionsBody<T> = T extends { stream: true }
-  ? ChatCompletionCreateParamsStreaming
-  : T extends { stream: false }
-    ? ChatCompletionCreateParamsNonStreaming
-    : ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming;
-
-export const llmCompletionsBodyFormat = <T extends CompletionsBodyType>(
-  body: T & {
-    stop?: string;
-  },
-  model: string | LLMModelItemType
-): InferCompletionsBody<T> => {
-  const modelData = typeof model === 'string' ? getLLMModel(model) : model;
-  if (!modelData) {
-    return body as unknown as InferCompletionsBody<T>;
-  }
-
-  const response_format = (() => {
-    if (!body.response_format?.type) return undefined;
-    if (body.response_format.type === 'json_schema') {
-      try {
-        return {
-          type: 'json_schema',
-          json_schema: json5.parse(body.response_format?.json_schema as unknown as string)
-        };
-      } catch (error) {
-        throw new Error('Json schema error');
-      }
-    }
-    if (body.response_format.type) {
-      return {
-        type: body.response_format.type
-      };
-    }
-    return undefined;
-  })();
-
-  const stop = body.stop ?? undefined;
-
-  const requestBody: T = {
-    ...body,
-    model: modelData.model,
-    temperature:
-      typeof body.temperature === 'number'
-        ? computedTemperature({
-            model: modelData,
-            temperature: body.temperature
-          })
-        : undefined,
-    ...modelData?.defaultConfig,
-    response_format,
-    stop: stop?.split('|')
-  };
-
-  // field map
-  if (modelData.fieldMap) {
-    Object.entries(modelData.fieldMap).forEach(([sourceKey, targetKey]) => {
-      // @ts-ignore
-      requestBody[targetKey] = body[sourceKey];
-      // @ts-ignore
-      delete requestBody[sourceKey];
-    });
-  }
-
-  return requestBody as unknown as InferCompletionsBody<T>;
-};
-
-export const llmStreamResponseToAnswerText = async (
-  response: StreamChatType
-): Promise<{
-  text: string;
-  usage?: CompletionUsage;
-  toolCalls?: ChatCompletionMessageToolCall[];
-}> => {
-  let answer = '';
-  let usage = getLLMDefaultUsage();
-  let toolCalls: ChatCompletionMessageToolCall[] = [];
-  let callingTool: { name: string; arguments: string } | null = null;
-
-  for await (const part of response) {
-    usage = part.usage || usage;
-    const responseChoice = part.choices?.[0]?.delta;
-
-    const content = responseChoice?.content || '';
-    answer += content;
-
-    // Tool calls
-    if (responseChoice?.tool_calls?.length) {
-      responseChoice.tool_calls.forEach((toolCall, i) => {
-        const index = toolCall.index ?? i;
-
-        // Call new tool
-        const hasNewTool = toolCall?.function?.name || callingTool;
-        if (hasNewTool) {
-          // 有 function name，代表新 call 工具
-          if (toolCall?.function?.name) {
-            callingTool = {
-              name: toolCall.function?.name || '',
-              arguments: toolCall.function?.arguments || ''
-            };
-          } else if (callingTool) {
-            // Continue call(Perhaps the name of the previous function was incomplete)
-            callingTool.name += toolCall.function?.name || '';
-            callingTool.arguments += toolCall.function?.arguments || '';
-          }
-
-          if (!callingTool) {
-            return;
-          }
-
-          // New tool, add to list.
-          const toolId = getNanoid();
-          toolCalls[index] = {
-            ...toolCall,
-            id: toolId,
-            type: 'function',
-            function: callingTool
-          };
-          callingTool = null;
-        } else {
-          /* arg 追加到当前工具的参数里 */
-          const arg: string = toolCall?.function?.arguments ?? '';
-          const currentTool = toolCalls[index];
-          if (currentTool && arg) {
-            currentTool.function.arguments += arg;
-          }
-        }
-      });
-    }
-  }
-  return {
-    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
-    usage,
-    toolCalls
-  };
-};
-export const llmUnStreamResponseToAnswerText = async (
-  response: UnStreamChatType
-): Promise<{
-  text: string;
-  toolCalls?: ChatCompletionMessageToolCall[];
-  usage?: CompletionUsage;
-}> => {
-  const answer = response.choices?.[0]?.message?.content || '';
-  const toolCalls = response.choices?.[0]?.message?.tool_calls;
-
-  return {
-    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
-    usage: response.usage,
-    toolCalls
-  };
-};
-export const formatLLMResponse = async (response: StreamChatType | UnStreamChatType) => {
-  if ('iterator' in response) {
-    return llmStreamResponseToAnswerText(response);
-  }
-  return llmUnStreamResponseToAnswerText(response);
-};
-
+// LLM utils
 // Parse <think></think> tags to think and answer - unstream response
 export const parseReasoningContent = (text: string): [string, string] => {
  const regex = /<think>([\s\S]*?)<\/think>/;
@@ -225,14 +54,6 @@ export const parseReasoningContent = (text: string): [string, string] => {
  return [thinkContent, answerContent];
 };

-export const removeDatasetCiteText = (text: string, retainDatasetCite: boolean) => {
-  return retainDatasetCite
-    ? text.replace(/[\[【]id[\]】]\(CITE\)/g, '')
-    : text
-        .replace(/[\[【]([a-f0-9]{24})[\]】](?:\([^\)]*\)?)?/g, '')
-        .replace(/[\[【]id[\]】]\(CITE\)/g, '');
-};
-
 // Parse llm stream part
 export const parseLLMStreamResponse = () => {
  let isInThinkTag: boolean | undefined = undefined;
@@ -274,8 +95,8 @@ export const parseLLMStreamResponse = () => {
    retainDatasetCite?: boolean;
  }): {
    reasoningContent: string;
-    content: string;
-    responseContent: string;
+    content: string; // 原始内容，不去掉 cite
+    responseContent: string; // 响应的内容，会去掉 cite
    finishReason: CompletionFinishReason;
  } => {
    const data = (() => {