diff --git a/packages/global/common/string/textSplitter.ts b/packages/global/common/string/textSplitter.ts index 12eaafa2a..b0faf70bf 100644 --- a/packages/global/common/string/textSplitter.ts +++ b/packages/global/common/string/textSplitter.ts @@ -1,24 +1,61 @@ import { getErrText } from '../error/utils'; import { replaceRegChars } from './tools'; -/** - * text split into chunks - * chunkLen - one chunk len. max: 3500 - * overlapLen - The size of the before and after Text - * chunkLen > overlapLen - * markdown - */ -export const splitText2Chunks = (props: { +export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----'; + +type SplitProps = { text: string; chunkLen: number; overlapRatio?: number; customReg?: string[]; -}): { +}; + +type SplitResponse = { chunks: string[]; chars: number; - overlapRatio?: number; -} => { +}; + +// 判断字符串是否为markdown的表格形式 +const strIsMdTable = (str: string) => { + const regex = /^(\|.*\|[\r]*)$/m; + + return regex.test(str); +}; +const markdownTableSplit = (props: SplitProps): SplitResponse => { + let { text = '', chunkLen } = props; + const splitText2Lines = text.split('\n'); + const header = splitText2Lines[0]; + + const headerSize = header.split('|').length - 2; + const mdSplitString = `| ${new Array(headerSize) + .fill(0) + .map(() => '---') + .join(' | ')} |`; + + const chunks: string[] = []; + let chunk = `${header} +${mdSplitString} +`; + + for (let i = 2; i < splitText2Lines.length; i++) { + if (chunk.length + splitText2Lines[i].length > chunkLen * 1.2) { + chunks.push(chunk); + chunk = `${header} +${mdSplitString} +`; + } + chunk += `${splitText2Lines[i]}\n`; + } + + return { + chunks, + chars: chunks.reduce((sum, chunk) => sum + chunk.length, 0) + }; +}; + +const commonSplit = (props: SplitProps): SplitResponse => { let { text = '', chunkLen, overlapRatio = 0.2, customReg = [] } = props; + const splitMarker = 'SPLIT_HERE_SPLIT_HERE'; const codeBlockMarker = 'CODE_BLOCK_LINE_MARKER'; const overlapLen = Math.round(chunkLen * overlapRatio); @@ -253,3 +290,29 @@ export const splitText2Chunks = (props: { throw new Error(getErrText(err)); } }; + +/** + * text split into chunks + * chunkLen - one chunk len. max: 3500 + * overlapLen - The size of the before and after Text + * chunkLen > overlapLen + * markdown + */ +export const splitText2Chunks = (props: SplitProps): SplitResponse => { + let { text = '' } = props; + + const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN); + + const splitResult = splitWithCustomSign.map((item) => { + if (strIsMdTable(text)) { + return markdownTableSplit(props); + } + + return commonSplit(props); + }); + + return { + chunks: splitResult.map((item) => item.chunks).flat(), + chars: splitResult.reduce((sum, item) => sum + item.chars, 0) + }; +}; diff --git a/packages/service/common/string/tiktoken/index.ts b/packages/service/common/string/tiktoken/index.ts index 1870cfe24..cf3e81307 100644 --- a/packages/service/common/string/tiktoken/index.ts +++ b/packages/service/common/string/tiktoken/index.ts @@ -57,7 +57,7 @@ export const countGptMessagesTokens = ( // 检测是否有内存泄漏 // addLog.info(`Count token time: ${Date.now() - start}, token: ${data}`); - // console.log(Object.keys(global.tiktokenWorker.callbackMap)); + // console.log(process.memoryUsage()); }; worker.postMessage({ diff --git a/packages/service/common/vectorStore/controller.d.ts b/packages/service/common/vectorStore/controller.d.ts index 311544de0..ee823786f 100644 --- a/packages/service/common/vectorStore/controller.d.ts +++ b/packages/service/common/vectorStore/controller.d.ts @@ -15,6 +15,6 @@ export type InsertVectorProps = { export type EmbeddingRecallProps = { teamId: string; datasetIds: string[]; - similarity?: number; - efSearch?: number; + // similarity?: number; + // efSearch?: number; }; diff --git a/packages/service/common/vectorStore/pg/controller.ts b/packages/service/common/vectorStore/pg/controller.ts index f6d115722..c78923642 100644 --- a/packages/service/common/vectorStore/pg/controller.ts +++ b/packages/service/common/vectorStore/pg/controller.ts @@ -129,17 +129,15 @@ export const embeddingRecall = async ( ): Promise<{ results: EmbeddingRecallItemType[]; }> => { - const { teamId, datasetIds, vectors, limit, similarity = 0, retry = 2, efSearch = 100 } = props; + const { datasetIds, vectors, limit, retry = 2 } = props; try { const results: any = await PgClient.query( `BEGIN; - SET LOCAL hnsw.ef_search = ${efSearch}; + SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100}; select id, collection_id, vector <#> '[${vectors[0]}]' AS score from ${PgDatasetTableName} - where team_id='${teamId}' - AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')}) - AND vector <#> '[${vectors[0]}]' < -${similarity} + where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')}) order by score limit ${limit}; COMMIT;` ); diff --git a/packages/service/core/dataset/search/controller.ts b/packages/service/core/dataset/search/controller.ts index ecefb7aff..f5f83c3ac 100644 --- a/packages/service/core/dataset/search/controller.ts +++ b/packages/service/core/dataset/search/controller.ts @@ -85,8 +85,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) { teamId, datasetIds, vectors, - limit, - efSearch: global.systemEnv?.pgHNSWEfSearch + limit }); // get q and a diff --git a/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts b/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts index f09ac158a..379dfca68 100644 --- a/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts +++ b/packages/service/core/workflow/dispatch/agent/classifyQuestion.ts @@ -127,8 +127,8 @@ const completions = async ({ }); const answer = data.choices?.[0].message?.content || ''; - console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2)); - console.log(answer, '----'); + // console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2)); + // console.log(answer, '----'); const id = agents.find((item) => answer.includes(item.key))?.key || diff --git a/packages/service/worker/file/extension/csv.ts b/packages/service/worker/file/extension/csv.ts index 85d3a3c5c..47db9ebc3 100644 --- a/packages/service/worker/file/extension/csv.ts +++ b/packages/service/worker/file/extension/csv.ts @@ -10,9 +10,13 @@ export const readCsvRawText = async (params: ReadRawTextByBuffer): Promise item.map((item, i) => `${header[i]}:${item}`).join('\n')).join('\n') - : ''; + // format to md table + const formatText = `| ${header.join(' | ')} | +| ${header.map(() => '---').join(' | ')} | +${csvArr + .slice(1) + .map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`) + .join('\n')}`; return { rawText, diff --git a/packages/service/worker/file/extension/xlsx.ts b/packages/service/worker/file/extension/xlsx.ts index a6105ed7b..81f8efb3f 100644 --- a/packages/service/worker/file/extension/xlsx.ts +++ b/packages/service/worker/file/extension/xlsx.ts @@ -1,3 +1,4 @@ +import { CUSTOM_SPLIT_SIGN } from '@fastgpt/global/common/string/textSplitter'; import { ReadRawTextByBuffer, ReadFileResponse } from '../type'; import xlsx from 'node-xlsx'; import Papa from 'papaparse'; @@ -18,25 +19,25 @@ export const readXlsxRawText = async ({ }); const rawText = format2Csv.map((item) => item.csvText).join('\n'); + const formatText = format2Csv .map((item) => { const csvArr = Papa.parse(item.csvText).data as string[][]; const header = csvArr[0]; - const formatText = header - ? csvArr - .map((item) => - item - .map((item, i) => (item ? `${header[i]}:${item}` : '')) - .filter(Boolean) - .join('\n') - ) - .join('\n') - : ''; + if (!header) return; - return `${item.title}\n${formatText}`; + const formatText = `| ${header.join(' | ')} | + | ${header.map(() => '---').join(' | ')} | + ${csvArr + .slice(1) + .map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`) + .join('\n')}`; + + return formatText; }) - .join('\n'); + .filter(Boolean) + .join(CUSTOM_SPLIT_SIGN); return { rawText: rawText, diff --git a/packages/service/worker/file/read.ts b/packages/service/worker/file/read.ts index 02f30faa6..fb7ea3c83 100644 --- a/packages/service/worker/file/read.ts +++ b/packages/service/worker/file/read.ts @@ -67,5 +67,5 @@ parentPort?.on('message', async (props: ReadRawTextProps) => { }); } - global?.close?.(); + process.exit(); }); diff --git a/packages/service/worker/htmlStr2Md/index.ts b/packages/service/worker/htmlStr2Md/index.ts index 95944ee1b..22a998760 100644 --- a/packages/service/worker/htmlStr2Md/index.ts +++ b/packages/service/worker/htmlStr2Md/index.ts @@ -15,6 +15,5 @@ parentPort?.on('message', (params: { html: string }) => { data: error }); } - - global?.close?.(); + process.exit(); }); diff --git a/packages/service/worker/tiktoken/countGptMessagesTokens.ts b/packages/service/worker/tiktoken/countGptMessagesTokens.ts index 1e2e9c586..80b13c318 100644 --- a/packages/service/worker/tiktoken/countGptMessagesTokens.ts +++ b/packages/service/worker/tiktoken/countGptMessagesTokens.ts @@ -26,91 +26,96 @@ parentPort?.on( tools?: ChatCompletionTool[]; functionCall?: ChatCompletionCreateParams.Function[]; }) => { - const start = Date.now(); - /* count one prompt tokens */ - const countPromptTokens = ( - prompt: string | ChatCompletionContentPart[] | null | undefined = '', - role: '' | `${ChatCompletionRequestMessageRoleEnum}` = '' - ) => { - const promptText = (() => { - if (!prompt) return ''; - if (typeof prompt === 'string') return prompt; - let promptText = ''; - prompt.forEach((item) => { - if (item.type === 'text') { - promptText += item.text; - } else if (item.type === 'image_url') { - promptText += item.image_url.url; - } - }); - return promptText; - })(); - - const text = `${role}\n${promptText}`.trim(); - - try { - const encodeText = enc.encode(text); - const supplementaryToken = role ? 4 : 0; - return encodeText.length + supplementaryToken; - } catch (error) { - return text.length; - } - }; - const countToolsTokens = ( - tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[] - ) => { - if (!tools || tools.length === 0) return 0; - - const toolText = tools - ? JSON.stringify(tools) - .replace('"', '') - .replace('\n', '') - .replace(/( ){2,}/g, ' ') - : ''; - - return enc.encode(toolText).length; - }; - - const total = - messages.reduce((sum, item) => { - // Evaluates the text of toolcall and functioncall - const functionCallPrompt = (() => { - let prompt = ''; - if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) { - const toolCalls = item.tool_calls; - prompt += - toolCalls - ?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim()) - ?.join('') || ''; - - const functionCall = item.function_call; - prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim(); - } - return prompt; + try { + /* count one prompt tokens */ + const countPromptTokens = ( + prompt: string | ChatCompletionContentPart[] | null | undefined = '', + role: '' | `${ChatCompletionRequestMessageRoleEnum}` = '' + ) => { + const promptText = (() => { + if (!prompt) return ''; + if (typeof prompt === 'string') return prompt; + let promptText = ''; + prompt.forEach((item) => { + if (item.type === 'text') { + promptText += item.text; + } else if (item.type === 'image_url') { + promptText += item.image_url.url; + } + }); + return promptText; })(); - const contentPrompt = (() => { - if (!item.content) return ''; - if (typeof item.content === 'string') return item.content; - return item.content - .map((item) => { - if (item.type === 'text') return item.text; - return ''; - }) - .join(''); - })(); + const text = `${role}\n${promptText}`.trim(); - return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role); - }, 0) + - countToolsTokens(tools) + - countToolsTokens(functionCall); + try { + const encodeText = enc.encode(text); + const supplementaryToken = role ? 4 : 0; + return encodeText.length + supplementaryToken; + } catch (error) { + return text.length; + } + }; + const countToolsTokens = ( + tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[] + ) => { + if (!tools || tools.length === 0) return 0; - parentPort?.postMessage({ - id, - type: 'success', - data: total - }); + const toolText = tools + ? JSON.stringify(tools) + .replace('"', '') + .replace('\n', '') + .replace(/( ){2,}/g, ' ') + : ''; - global?.close?.(); + return enc.encode(toolText).length; + }; + + const total = + messages.reduce((sum, item) => { + // Evaluates the text of toolcall and functioncall + const functionCallPrompt = (() => { + let prompt = ''; + if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) { + const toolCalls = item.tool_calls; + prompt += + toolCalls + ?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim()) + ?.join('') || ''; + + const functionCall = item.function_call; + prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim(); + } + return prompt; + })(); + + const contentPrompt = (() => { + if (!item.content) return ''; + if (typeof item.content === 'string') return item.content; + return item.content + .map((item) => { + if (item.type === 'text') return item.text; + return ''; + }) + .join(''); + })(); + + return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role); + }, 0) + + countToolsTokens(tools) + + countToolsTokens(functionCall); + + parentPort?.postMessage({ + id, + type: 'success', + data: total + }); + } catch (error) { + parentPort?.postMessage({ + id, + type: 'success', + data: 0 + }); + } } ); diff --git a/packages/service/worker/utils.ts b/packages/service/worker/utils.ts index f9ab4be72..e0286a0a9 100644 --- a/packages/service/worker/utils.ts +++ b/packages/service/worker/utils.ts @@ -25,9 +25,12 @@ export const runWorker = (name: WorkerNameEnum, params?: Record { - worker.terminate(); - reject(err); + worker.terminate(); + }); + worker.on('messageerror', (err) => { + reject(err); + worker.terminate(); }); }); }; diff --git a/projects/app/public/docs/versionIntro.md b/projects/app/public/docs/versionIntro.md index 45105750a..2fa2d1426 100644 --- a/projects/app/public/docs/versionIntro.md +++ b/projects/app/public/docs/versionIntro.md @@ -1,10 +1,32 @@ -### FastGPT V4.7.1 +### FastGPT V4.8 -1. 新增 - 语音输入完整配置。支持选择是否打开语音输入(包括分享页面),支持语音输入后自动发送,支持语音输入后自动语音播放(流式)。 -2. 新增 - Pptx 和 xlsx 文件读取。但所有文件读取都放服务端,会消耗更多的服务器资源,以及无法在上传时预览更多内容。 -3. 新增 - 集成 Laf 云函数,可以读取 Laf 账号中的云函数作为 HTTP 模块。 -4. 修改 - csv导入模板,取消 header 校验,自动获取前两列。 -5. 修复 - 问题补全历史记录BUG -6. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow/intro) -7. [使用文档](https://doc.fastgpt.in/docs/intro/) -8. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/) \ No newline at end of file +本次更新的重点是对工作流 (高级编排) 进行了重构,使其更加简洁和强大。但由于新旧工作流机制有较大变化,尽管我们进行了一定的自动转换,仍有部分工作流需要您手动重建。请尽快更新到新版本,并对工作流进行必要的调试和重新发布。 + +❗ 重要提示: +1️⃣ 旧工作流更新后暂不失效,打开旧工作流会弹出自动转换提示,重新编排后点 “发布” 按钮发布新工作流 +2️⃣ 发布新工作流前,工作流自动保存功能暂不生效 +3️⃣ 应用和插件新增 version 字段,标识适用新/旧版工作流,以实现兼容 + +✨ 新增功能亮点: +1️⃣ 判断器:支持 if/elseIf/else 判断逻辑,工作流控制更灵活 +2️⃣ 变量更新节点:运行中可动态修改工作流输出变量或全局变量值 +3️⃣ 工作流自动保存和版本管理:自动保存修改,支持查看和回滚历史版本 +4️⃣ 工作流调试模式:更直观高效,可调试单节点或逐步执行,实时查看输入输出数据 +5️⃣ 定时执行应用:支持简单配置实现各种定时任务 + +🛠️ 其他优化与修复: +- 优化工作流节点连线方式,支持四向连接,易构建循环工作流 +- 显著提升工作流上下文数据传递性能 +- 简易模式下修改配置自动刷新调试框,免手动保存 +- 改进 worker 进程管理,支持 Token 计算任务分配,提高效率 +- 工具调用支持 string、boolean、number 数据类型 +- 完善 completions 接口对 size 参数限制 +- 重构 Node.js API 中间件和服务端代码 +- 对话记录长度调整为偶数,最大长度增至 50 轮,避免奇数导致部分模型不兼容 +- HTTP 节点出错将终止进程,避免异常影响 +- 修复工具调用名称不能以数字开头问题 +- 修复分享链接 query 参数缓存 bug +- 修复工具调用和 HTTP 模块兼容性问题 +- [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow/intro) +- [使用文档](https://doc.fastgpt.in/docs/intro/) +- [点击查看商业版](https://doc.fastgpt.in/docs/commercial/) \ No newline at end of file diff --git a/projects/app/src/components/ChatBox/MessageInput.tsx b/projects/app/src/components/ChatBox/MessageInput.tsx index c894c7edc..6a4195271 100644 --- a/projects/app/src/components/ChatBox/MessageInput.tsx +++ b/projects/app/src/components/ChatBox/MessageInput.tsx @@ -55,6 +55,8 @@ const MessageInput = ({ const { t } = useTranslation(); const havInput = !!inputValue || fileList.length > 0; + const hasFileUploading = fileList.some((item) => !item.url); + const canSendMessage = havInput && !hasFileUploading; /* file selector and upload */ const { File, onOpen: onOpenSelectFile } = useSelectFile({ @@ -142,7 +144,8 @@ const MessageInput = ({ ); /* on send */ - const handleSend = useCallback(async () => { + const handleSend = async () => { + if (!canSendMessage) return; const textareaValue = TextareaDom.current?.value || ''; onSendMessage({ @@ -150,7 +153,7 @@ const MessageInput = ({ files: fileList }); replaceFile([]); - }, [TextareaDom, fileList, onSendMessage, replaceFile]); + }; /* whisper init */ const { @@ -466,16 +469,20 @@ const MessageInput = ({ h={['28px', '32px']} w={['28px', '32px']} borderRadius={'md'} - bg={isSpeaking || isChatting ? '' : !havInput ? '#E5E5E5' : 'primary.500'} + bg={ + isSpeaking || isChatting + ? '' + : !havInput || hasFileUploading + ? '#E5E5E5' + : 'primary.500' + } cursor={havInput ? 'pointer' : 'not-allowed'} lineHeight={1} onClick={() => { if (isChatting) { return onStop(); } - if (havInput) { - return handleSend(); - } + return handleSend(); }} > {isChatting ? ( diff --git a/projects/app/src/components/Markdown/index.tsx b/projects/app/src/components/Markdown/index.tsx index a32867f9e..71516fbc5 100644 --- a/projects/app/src/components/Markdown/index.tsx +++ b/projects/app/src/components/Markdown/index.tsx @@ -53,7 +53,7 @@ const Markdown = ({ ); const formatSource = source - .replace(/\\n/g, '\n ') + // .replace(/\\n/g, '\n') .replace(/(http[s]?:\/\/[^\s,。]+)([。,])/g, '$1 $2') .replace(/\n*(\[QUOTE SIGN\]\(.*\))/g, '$1'); diff --git a/projects/app/src/pages/api/plugins/textEditor/v2/index.ts b/projects/app/src/pages/api/plugins/textEditor/v2/index.ts index 57af18e60..c01824ce3 100644 --- a/projects/app/src/pages/api/plugins/textEditor/v2/index.ts +++ b/projects/app/src/pages/api/plugins/textEditor/v2/index.ts @@ -40,3 +40,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< res.status(500).send(getErrText(err)); } } + +export const config = { + api: { + responseLimit: '16mb' + } +}; diff --git a/projects/app/src/pages/api/v1/chat/completions.ts b/projects/app/src/pages/api/v1/chat/completions.ts index 3ed81bd7c..72c57a970 100644 --- a/projects/app/src/pages/api/v1/chat/completions.ts +++ b/projects/app/src/pages/api/v1/chat/completions.ts @@ -362,12 +362,6 @@ async function handler(req: NextApiRequest, res: NextApiResponse) { } export default NextAPI(handler); -export const config = { - api: { - responseLimit: '20mb' - } -}; - const authShareChat = async ({ chatId, ...data @@ -526,3 +520,9 @@ const authHeaderRequest = async ({ canWrite }; }; + +export const config = { + api: { + responseLimit: '20mb' + } +}; diff --git a/projects/app/src/pages/dataset/detail/components/Import/diffSource/TableLocal.tsx b/projects/app/src/pages/dataset/detail/components/Import/diffSource/TableLocal.tsx index 27e884e96..b574cb396 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/diffSource/TableLocal.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/diffSource/TableLocal.tsx @@ -28,7 +28,7 @@ export default React.memo(FileLocal); const csvTemplate = `"第一列内容","第二列内容" "必填列","可选列。CSV 中请注意内容不能包含双引号,双引号是列分割符号" -"只会讲第一和第二列内容导入,其余列会被忽略","" +"只会将第一和第二列内容导入,其余列会被忽略","" "结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段,即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。","" "AIGC发展分为几个阶段?","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`; diff --git a/projects/app/src/web/core/dataset/store/searchTest.ts b/projects/app/src/web/core/dataset/store/searchTest.ts index 700bf1796..fd5444dc3 100644 --- a/projects/app/src/web/core/dataset/store/searchTest.ts +++ b/projects/app/src/web/core/dataset/store/searchTest.ts @@ -32,7 +32,7 @@ export const useSearchTestStore = create()( datasetTestList: [], pushDatasetTestItem(data) { set((state) => { - state.datasetTestList = [data, ...state.datasetTestList].slice(0, 100); + state.datasetTestList = [data, ...state.datasetTestList].slice(0, 50); }); }, delDatasetTestItemById(id) {