import { type LLMModelItemType } from '@fastgpt/global/core/ai/model.d'; import type { ChatCompletionCreateParamsNonStreaming, ChatCompletionCreateParamsStreaming, CompletionFinishReason, StreamChatType, UnStreamChatType, CompletionUsage, ChatCompletionMessageToolCall } from '@fastgpt/global/core/ai/type'; import { getLLMModel } from './model'; import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants'; import { getNanoid } from '@fastgpt/global/common/string/tools'; import json5 from 'json5'; /* Count response max token */ export const computedMaxToken = ({ maxToken, model }: { maxToken?: number; model: LLMModelItemType; }) => { if (maxToken === undefined) return; maxToken = Math.min(maxToken, model.maxResponse); return maxToken; }; // FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]…… export const computedTemperature = ({ model, temperature }: { model: LLMModelItemType; temperature: number; }) => { if (typeof model.maxTemperature !== 'number') return undefined; temperature = +(model.maxTemperature * (temperature / 10)).toFixed(2); temperature = Math.max(temperature, 0.01); return temperature; }; type CompletionsBodyType = | ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming; type InferCompletionsBody = T extends { stream: true } ? ChatCompletionCreateParamsStreaming : T extends { stream: false } ? ChatCompletionCreateParamsNonStreaming : ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming; export const llmCompletionsBodyFormat = ( body: T & { stop?: string; }, model: string | LLMModelItemType ): InferCompletionsBody => { const modelData = typeof model === 'string' ? getLLMModel(model) : model; if (!modelData) { return body as unknown as InferCompletionsBody; } const response_format = (() => { if (!body.response_format?.type) return undefined; if (body.response_format.type === 'json_schema') { try { return { type: 'json_schema', json_schema: json5.parse(body.response_format?.json_schema as unknown as string) }; } catch (error) { throw new Error('Json schema error'); } } if (body.response_format.type) { return { type: body.response_format.type }; } return undefined; })(); const stop = body.stop ?? undefined; const requestBody: T = { ...body, model: modelData.model, temperature: typeof body.temperature === 'number' ? computedTemperature({ model: modelData, temperature: body.temperature }) : undefined, ...modelData?.defaultConfig, response_format, stop: stop?.split('|') }; // field map if (modelData.fieldMap) { Object.entries(modelData.fieldMap).forEach(([sourceKey, targetKey]) => { // @ts-ignore requestBody[targetKey] = body[sourceKey]; // @ts-ignore delete requestBody[sourceKey]; }); } return requestBody as unknown as InferCompletionsBody; }; export const llmStreamResponseToAnswerText = async ( response: StreamChatType ): Promise<{ text: string; usage?: CompletionUsage; toolCalls?: ChatCompletionMessageToolCall[]; }> => { let answer = ''; let usage = getLLMDefaultUsage(); let toolCalls: ChatCompletionMessageToolCall[] = []; let callingTool: { name: string; arguments: string } | null = null; for await (const part of response) { usage = part.usage || usage; const responseChoice = part.choices?.[0]?.delta; const content = responseChoice?.content || ''; answer += content; // Tool calls if (responseChoice?.tool_calls?.length) { responseChoice.tool_calls.forEach((toolCall, i) => { const index = toolCall.index ?? i; if (toolCall.id || callingTool) { // 有 id，代表新 call 工具 if (toolCall.id) { callingTool = { name: toolCall.function?.name || '', arguments: toolCall.function?.arguments || '' }; } else if (callingTool) { // Continue call(Perhaps the name of the previous function was incomplete) callingTool.name += toolCall.function?.name || ''; callingTool.arguments += toolCall.function?.arguments || ''; } if (!callingTool) { return; } // New tool, add to list. const toolId = getNanoid(); toolCalls[index] = { ...toolCall, id: toolId, type: 'function', function: callingTool }; callingTool = null; } else { /* arg 追加到当前工具的参数里 */ const arg: string = toolCall?.function?.arguments ?? ''; const currentTool = toolCalls[index]; if (currentTool && arg) { currentTool.function.arguments += arg; } } }); } } return { text: parseReasoningContent(answer)[1], usage, toolCalls }; }; export const llmUnStreamResponseToAnswerText = async ( response: UnStreamChatType ): Promise<{ text: string; toolCalls?: ChatCompletionMessageToolCall[]; usage?: CompletionUsage; }> => { const answer = response.choices?.[0]?.message?.content || ''; const toolCalls = response.choices?.[0]?.message?.tool_calls; return { text: answer, usage: response.usage, toolCalls }; }; export const formatLLMResponse = async (response: StreamChatType | UnStreamChatType) => { if ('iterator' in response) { return llmStreamResponseToAnswerText(response); } return llmUnStreamResponseToAnswerText(response); }; // Parse tags to think and answer - unstream response export const parseReasoningContent = (text: string): [string, string] => { const regex = /([\s\S]*?)<\/think>/; const match = text.match(regex); if (!match) { return ['', text]; } const thinkContent = match[1].trim(); // Add answer (remaining text after think tag) const answerContent = text.slice(match.index! + match[0].length); return [thinkContent, answerContent]; }; export const removeDatasetCiteText = (text: string, retainDatasetCite: boolean) => { return retainDatasetCite ? text : text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, ''); }; // Parse llm stream part export const parseLLMStreamResponse = () => { let isInThinkTag: boolean | undefined = undefined; let startTagBuffer = ''; let endTagBuffer = ''; const thinkStartChars = ''; const thinkEndChars = ''; let citeBuffer = ''; const maxCiteBufferLength = 32; // [Object](CITE)总长度为32 /* parseThinkTag - 只控制是否主动解析，如果接口已经解析了，则不再解析。 retainDatasetCite - */ const parsePart = ({ part, parseThinkTag = true, retainDatasetCite = true }: { part: { choices: { delta: { content?: string | null; reasoning_content?: string; }; finish_reason?: CompletionFinishReason; }[]; }; parseThinkTag?: boolean; retainDatasetCite?: boolean; }): { reasoningContent: string; content: string; responseContent: string; finishReason: CompletionFinishReason; } => { const finishReason = part.choices?.[0]?.finish_reason || null; const content = part.choices?.[0]?.delta?.content || ''; // @ts-ignore const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || ''; const isStreamEnd = !!finishReason; // Parse think const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => { if (reasoningContent || !parseThinkTag) { isInThinkTag = false; return { reasoningContent, content }; } if (!content) { return { reasoningContent: '', content: '' }; } // 如果不在 think 标签中，或者有 reasoningContent(接口已解析），则返回 reasoningContent 和 content if (isInThinkTag === false) { return { reasoningContent: '', content }; } // 检测是否为 think 标签开头的数据 if (isInThinkTag === undefined) { // Parse content think and answer startTagBuffer += content; // 太少内容时候，暂时不解析 if (startTagBuffer.length < thinkStartChars.length) { if (isStreamEnd) { const tmpContent = startTagBuffer; startTagBuffer = ''; return { reasoningContent: '', content: tmpContent }; } return { reasoningContent: '', content: '' }; } if (startTagBuffer.startsWith(thinkStartChars)) { isInThinkTag = true; return { reasoningContent: startTagBuffer.slice(thinkStartChars.length), content: '' }; } // 如果未命中 think 标签，则认为不在 think 标签中，返回 buffer 内容作为 content isInThinkTag = false; return { reasoningContent: '', content: startTagBuffer }; } // 确认是 think 标签内容，开始返回 think 内容，并实时检测 /* 检测方案。存储所有疑似的内容，直到检测到完整的标签或超出长度。 content 返回值包含以下几种情况: abc - 完全未命中尾标签 abc - 完全命中尾标签 abcabc - 完全命中尾标签 abc - 完全命中尾标签 k>abc - 命中一部分尾标签 */ // endTagBuffer 专门用来记录疑似尾标签的内容 if (endTagBuffer) { endTagBuffer += content; if (endTagBuffer.includes(thinkEndChars)) { isInThinkTag = false; const answer = endTagBuffer.slice(thinkEndChars.length); return { reasoningContent: '', content: answer }; } else if (endTagBuffer.length >= thinkEndChars.length) { // 缓存内容超出尾标签长度，且仍未命中，则认为本次猜测失败，仍处于 think 阶段。 const tmp = endTagBuffer; endTagBuffer = ''; return { reasoningContent: tmp, content: '' }; } return { reasoningContent: '', content: '' }; } else if (content.includes(thinkEndChars)) { // 返回内容，完整命中，直接结束 isInThinkTag = false; const [think, answer] = content.split(thinkEndChars); return { reasoningContent: think, content: answer }; } else { // 无 buffer，且未命中，开始疑似检测。 for (let i = 1; i < thinkEndChars.length; i++) { const partialEndTag = thinkEndChars.slice(0, i); // 命中一部分尾标签 if (content.endsWith(partialEndTag)) { const think = content.slice(0, -partialEndTag.length); endTagBuffer += partialEndTag; return { reasoningContent: think, content: '' }; } } } // 完全未命中尾标签，还是 think 阶段。 return { reasoningContent: content, content: '' }; })(); // Parse datset cite if (retainDatasetCite) { return { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent, responseContent: parsedThinkContent, finishReason }; } // 缓存包含 [ 的字符串，直到超出 maxCiteBufferLength 再一次性返回 const parseCite = (text: string) => { // 结束时，返回所有剩余内容 if (isStreamEnd) { const content = citeBuffer + text; return { content: removeDatasetCiteText(content, false) }; } // 新内容包含 [，初始化缓冲数据 if (text.includes('[')) { const index = text.indexOf('['); const beforeContent = citeBuffer + text.slice(0, index); citeBuffer = text.slice(index); // beforeContent 可能是：普通字符串，带 [ 的字符串 return { content: removeDatasetCiteText(beforeContent, false) }; } // 处于 Cite 缓冲区，判断是否满足条件 else if (citeBuffer) { citeBuffer += text; // 检查缓冲区长度是否达到完整Quote长度或已经流结束 if (citeBuffer.length >= maxCiteBufferLength) { const content = removeDatasetCiteText(citeBuffer, false); citeBuffer = ''; return { content }; } else { // 暂时不返回内容 return { content: '' }; } } return { content: text }; }; const { content: pasedCiteContent } = parseCite(parsedThinkContent); return { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent, responseContent: pasedCiteContent, finishReason }; }; return { parsePart }; };