import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d'; import { ChatCompletionCreateParamsNonStreaming, ChatCompletionCreateParamsStreaming, CompletionFinishReason, StreamChatType, UnStreamChatType, CompletionUsage } from '@fastgpt/global/core/ai/type'; import { getLLMModel } from './model'; import { getLLMDefaultUsage } from '@fastgpt/global/core/ai/constants'; /* Count response max token */ export const computedMaxToken = ({ maxToken, model }: { maxToken?: number; model: LLMModelItemType; }) => { if (maxToken === undefined) return; maxToken = Math.min(maxToken, model.maxResponse); return maxToken; }; // FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]…… export const computedTemperature = ({ model, temperature }: { model: LLMModelItemType; temperature: number; }) => { if (typeof model.maxTemperature !== 'number') return undefined; temperature = +(model.maxTemperature * (temperature / 10)).toFixed(2); temperature = Math.max(temperature, 0.01); return temperature; }; type CompletionsBodyType = | ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming; type InferCompletionsBody = T extends { stream: true } ? ChatCompletionCreateParamsStreaming : T extends { stream: false } ? ChatCompletionCreateParamsNonStreaming : ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming; export const llmCompletionsBodyFormat = ( body: T & { response_format?: any; json_schema?: string; stop?: string; }, model: string | LLMModelItemType ): InferCompletionsBody => { const modelData = typeof model === 'string' ? getLLMModel(model) : model; if (!modelData) { return body as unknown as InferCompletionsBody; } const response_format = body.response_format; const json_schema = body.json_schema ?? undefined; const stop = body.stop ?? undefined; const requestBody: T = { ...body, model: modelData.model, temperature: typeof body.temperature === 'number' ? computedTemperature({ model: modelData, temperature: body.temperature }) : undefined, ...modelData?.defaultConfig, response_format: response_format ? { type: response_format, json_schema } : undefined, stop: stop?.split('|') }; // field map if (modelData.fieldMap) { Object.entries(modelData.fieldMap).forEach(([sourceKey, targetKey]) => { // @ts-ignore requestBody[targetKey] = body[sourceKey]; // @ts-ignore delete requestBody[sourceKey]; }); } return requestBody as unknown as InferCompletionsBody; }; export const llmStreamResponseToAnswerText = async ( response: StreamChatType ): Promise<{ text: string; usage?: CompletionUsage; }> => { let answer = ''; let usage = getLLMDefaultUsage(); for await (const part of response) { usage = part.usage || usage; const content = part.choices?.[0]?.delta?.content || ''; answer += content; } return { text: parseReasoningContent(answer)[1], usage }; }; export const llmUnStreamResponseToAnswerText = async ( response: UnStreamChatType ): Promise<{ text: string; usage?: CompletionUsage; }> => { const answer = response.choices?.[0]?.message?.content || ''; return { text: answer, usage: response.usage }; }; export const llmResponseToAnswerText = async (response: StreamChatType | UnStreamChatType) => { if ('iterator' in response) { return llmStreamResponseToAnswerText(response); } return llmUnStreamResponseToAnswerText(response); }; // Parse tags to think and answer - unstream response export const parseReasoningContent = (text: string): [string, string] => { const regex = /([\s\S]*?)<\/think>/; const match = text.match(regex); if (!match) { return ['', text]; } const thinkContent = match[1].trim(); // Add answer (remaining text after think tag) const answerContent = text.slice(match.index! + match[0].length); return [thinkContent, answerContent]; }; // Parse tags to think and answer - stream response export const parseReasoningStreamContent = () => { let isInThinkTag: boolean | undefined; const startTag = ''; let startTagBuffer = ''; const endTag = ''; let endTagBuffer = ''; /* parseThinkTag - 只控制是否主动解析 ,如果接口已经解析了,则不再解析。 */ const parsePart = ( part: { choices: { delta: { content?: string | null; reasoning_content?: string; }; finish_reason?: CompletionFinishReason; }[]; }, parseThinkTag = false ): { reasoningContent: string; content: string; finishReason: CompletionFinishReason; } => { const content = part.choices?.[0]?.delta?.content || ''; const finishReason = part.choices?.[0]?.finish_reason || null; // @ts-ignore const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || ''; if (reasoningContent || !parseThinkTag) { isInThinkTag = false; return { reasoningContent, content, finishReason }; } if (!content) { return { reasoningContent: '', content: '', finishReason }; } // 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content if (isInThinkTag === false) { return { reasoningContent: '', content, finishReason }; } // 检测是否为 think 标签开头的数据 if (isInThinkTag === undefined) { // Parse content think and answer startTagBuffer += content; // 太少内容时候,暂时不解析 if (startTagBuffer.length < startTag.length) { return { reasoningContent: '', content: '', finishReason }; } if (startTagBuffer.startsWith(startTag)) { isInThinkTag = true; return { reasoningContent: startTagBuffer.slice(startTag.length), content: '', finishReason }; } // 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content isInThinkTag = false; return { reasoningContent: '', content: startTagBuffer, finishReason }; } // 确认是 think 标签内容,开始返回 think 内容,并实时检测 /* 检测 方案。 存储所有疑似 的内容,直到检测到完整的 标签或超出 长度。 content 返回值包含以下几种情况: abc - 完全未命中尾标签 abc - 完全命中尾标签 abcabc - 完全命中尾标签 abc - 完全命中尾标签 k>abc - 命中一部分尾标签 */ // endTagBuffer 专门用来记录疑似尾标签的内容 if (endTagBuffer) { endTagBuffer += content; if (endTagBuffer.includes(endTag)) { isInThinkTag = false; const answer = endTagBuffer.slice(endTag.length); return { reasoningContent: '', content: answer, finishReason }; } else if (endTagBuffer.length >= endTag.length) { // 缓存内容超出尾标签长度,且仍未命中 ,则认为本次猜测 失败,仍处于 think 阶段。 const tmp = endTagBuffer; endTagBuffer = ''; return { reasoningContent: tmp, content: '', finishReason }; } return { reasoningContent: '', content: '', finishReason }; } else if (content.includes(endTag)) { // 返回内容,完整命中,直接结束 isInThinkTag = false; const [think, answer] = content.split(endTag); return { reasoningContent: think, content: answer, finishReason }; } else { // 无 buffer,且未命中 ,开始疑似 检测。 for (let i = 1; i < endTag.length; i++) { const partialEndTag = endTag.slice(0, i); // 命中一部分尾标签 if (content.endsWith(partialEndTag)) { const think = content.slice(0, -partialEndTag.length); endTagBuffer += partialEndTag; return { reasoningContent: think, content: '', finishReason }; } } } // 完全未命中尾标签,还是 think 阶段。 return { reasoningContent: content, content: '', finishReason }; }; const getStartTagBuffer = () => startTagBuffer; return { parsePart, getStartTagBuffer }; };