import { getErrText } from '@fastgpt/global/common/error/utils'; import { countPromptTokens } from '@/global/common/tiktoken'; /* replace {{variable}} to value */ export function replaceVariable(text: string, obj: Record) { for (const key in obj) { const val = obj[key]; if (!['string', 'number'].includes(typeof val)) continue; text = text.replace(new RegExp(`{{(${key})}}`, 'g'), String(val)); } return text || ''; } /** * text split into chunks * maxLen - one chunk len. max: 3500 * overlapLen - The size of the before and after Text * maxLen > overlapLen */ export const splitText2Chunks = ({ text = '', maxLen }: { text: string; maxLen: number }) => { const overlapLen = Math.floor(maxLen * 0.15); // Overlap length const tempMarker = 'SPLIT_HERE_SPLIT_HERE'; const stepReg: Record = { 0: /(\n\n)/g, 1: /([\n])/g, 2: /[。]|(?!<[^a-zA-Z])\.\s/g, 3: /([!?]|!\s|\?\s)/g, 4: /([;]|;\s)/g, 5: /([,]|,\s)/g }; const splitTextRecursively = ({ text = '', step }: { text: string; step: number }) => { if (text.length <= maxLen) { return [text]; } const reg = stepReg[step]; if (!reg) { // use slice-maxLen to split text const chunks: string[] = []; let chunk = ''; for (let i = 0; i < text.length; i += maxLen - overlapLen) { chunk = text.slice(i, i + maxLen); chunks.push(chunk); } return chunks; } // split text by delimiters const splitTexts = text .replace(reg, `$1${tempMarker}`) .split(`${tempMarker}`) .filter((part) => part); let chunks: string[] = []; let preChunk = ''; let chunk = ''; for (let i = 0; i < splitTexts.length; i++) { let text = splitTexts[i]; // chunk over size if (text.length > maxLen) { const innerChunks = splitTextRecursively({ text, step: step + 1 }); if (innerChunks.length === 0) continue; // If the last chunk is too small, it is merged into the next chunk if (innerChunks[innerChunks.length - 1].length <= maxLen * 0.5) { text = innerChunks.pop() || ''; chunks = chunks.concat(innerChunks); } else { chunks = chunks.concat(innerChunks); continue; } } chunk += text; // size over lapLen, push it to next chunk if (chunk.length > maxLen - overlapLen) { preChunk += text; } if (chunk.length >= maxLen) { chunks.push(chunk); chunk = preChunk; preChunk = ''; } } if (chunk && !chunks[chunks.length - 1].endsWith(chunk)) { chunks.push(chunk); } return chunks; }; try { const chunks = splitTextRecursively({ text, step: 0 }); const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0); return { chunks, tokens }; } catch (err) { throw new Error(getErrText(err)); } };