diff --git a/docSite/content/docs/development/configuration.md b/docSite/content/docs/development/configuration.md index 1f8a8acd9..cad3a226d 100644 --- a/docSite/content/docs/development/configuration.md +++ b/docSite/content/docs/development/configuration.md @@ -26,7 +26,7 @@ weight: 520 "qaMaxProcess": 15, // QA 生成最大进程,结合数据库性能和 key 来设置 "pgHNSWEfSearch": 100 // pg vector 索引参数,越大精度高但速度慢 }, - "ChatModels": [ + "ChatModels": [ // 对话模型 { "model": "gpt-3.5-turbo-1106", "name": "GPT35-1106", @@ -76,7 +76,7 @@ weight: 520 "defaultSystemChatPrompt": "" } ], - "QAModels": [ + "QAModels": [ // QA 生成模型 { "model": "gpt-3.5-turbo-16k", "name": "GPT35-16k", @@ -85,14 +85,14 @@ weight: 520 "price": 0 } ], - "CQModels": [ + "CQModels": [ // 问题分类模型 { "model": "gpt-3.5-turbo-1106", "name": "GPT35-1106", "maxContext": 16000, "maxResponse": 4000, "price": 0, - "functionCall": true, + "functionCall": true, // 是否支持function call, 不支持的模型需要设置为 false,会走提示词生成 "functionPrompt": "" }, { @@ -105,7 +105,7 @@ weight: 520 "functionPrompt": "" } ], - "ExtractModels": [ + "ExtractModels": [ // 内容提取模型 { "model": "gpt-3.5-turbo-1106", "name": "GPT35-1106", @@ -116,7 +116,7 @@ weight: 520 "functionPrompt": "" } ], - "QGModels": [ + "QGModels": [ // 生成下一步指引 { "model": "gpt-3.5-turbo-1106", "name": "GPT35-1106", @@ -125,7 +125,7 @@ weight: 520 "price": 0 } ], - "VectorModels": [ + "VectorModels": [ // 向量模型 { "model": "text-embedding-ada-002", "name": "Embedding-2", diff --git a/docSite/content/docs/installation/upgrading/462.md b/docSite/content/docs/installation/upgrading/462.md new file mode 100644 index 000000000..8f47cc80f --- /dev/null +++ b/docSite/content/docs/installation/upgrading/462.md @@ -0,0 +1,31 @@ +--- +title: 'V4.6.2(需要初始化)' +description: 'FastGPT V4.6.2' +icon: 'upgrade' +draft: false +toc: true +weight: 834 +--- + +## 1。执行初始化 API + +发起 1 个 HTTP 请求 ({{rootkey}} 替换成环境变量里的 `rootkey`,{{host}} 替换成自己域名) + +1. https://xxxxx/api/admin/initv462 + +```bash +curl --location --request POST 'https://{{host}}/api/admin/initv462' \ +--header 'rootkey: {{rootkey}}' \ +--header 'Content-Type: application/json' +``` + +初始化说明: +1. 初始化全文索引 + +## V4.6.2 功能介绍 + +1. 新增 - 全文索引(需配合 Rerank 模型,在看怎么放到开源版,模型接口比较特殊) +2. 新增 - 插件来源(预计4.7/4.8版本会正式使用) +3. 优化 - PDF读取 +4. 优化 - docx文件读取,转成 markdown 并保留其图片内容 +5. 修复和优化 TextSplitter 函数 diff --git a/packages/global/common/string/textSplitter.ts b/packages/global/common/string/textSplitter.ts index 6f0eadb7a..b3b3a1b4c 100644 --- a/packages/global/common/string/textSplitter.ts +++ b/packages/global/common/string/textSplitter.ts @@ -3,126 +3,184 @@ import { countPromptTokens } from './tiktoken'; /** * text split into chunks - * maxLen - one chunk len. max: 3500 + * chunkLen - one chunk len. max: 3500 * overlapLen - The size of the before and after Text - * maxLen > overlapLen + * chunkLen > overlapLen * markdown */ -export const splitText2Chunks = (props: { text: string; maxLen: number; overlapLen?: number }) => { - const { text = '', maxLen, overlapLen = Math.floor(maxLen * 0.2) } = props; - const tempMarker = 'SPLIT_HERE_SPLIT_HERE'; +export const splitText2Chunks = (props: { + text: string; + chunkLen: number; + overlapRatio?: number; +}): { + chunks: string[]; + tokens: number; +} => { + const { text = '', chunkLen, overlapRatio = 0.2 } = props; + const splitMarker = 'SPLIT_HERE_SPLIT_HERE'; + const overlapLen = Math.round(chunkLen * overlapRatio); - const stepReg: Record = { - 0: /^(#\s[^\n]+)\n/gm, - 1: /^(##\s[^\n]+)\n/gm, - 2: /^(###\s[^\n]+)\n/gm, - 3: /^(####\s[^\n]+)\n/gm, + // The larger maxLen is, the next sentence is less likely to trigger splitting + const stepReges: { reg: RegExp; maxLen: number }[] = [ + { reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 }, + { reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 }, + { reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 }, + { reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 }, - 4: /(\n\n)/g, - 5: /([\n])/g, - 6: /([。]|(?!<[^a-zA-Z])\.\s)/g, - 7: /([!?]|!\s|\?\s)/g, - 8: /([;]|;\s)/g, - 9: /([,]|,\s)/g + { reg: /([\n]{2})/g, maxLen: chunkLen * 1.4 }, + { reg: /([\n](?![\*\-|>`0-9]))/g, maxLen: chunkLen * 1.8 }, // (?![\*\-|>`0-9]): markdown special char + { reg: /([\n])/g, maxLen: chunkLen * 1.4 }, + + { reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.4 }, + { reg: /([!]|!\s)/g, maxLen: chunkLen * 1.4 }, + { reg: /([?]|\?\s)/g, maxLen: chunkLen * 1.6 }, + { reg: /([;]|;\s)/g, maxLen: chunkLen * 1.8 }, + { reg: /([,]|,\s)/g, maxLen: chunkLen * 2 } + ]; + + const getSplitTexts = ({ text, step }: { text: string; step: number }) => { + if (step >= stepReges.length) { + return [text]; + } + const isMarkdownSplit = step <= 3; + const { reg } = stepReges[step]; + + const splitTexts = text + .replace(reg, isMarkdownSplit ? `${splitMarker}$1` : `$1${splitMarker}`) + .split(`${splitMarker}`) + .filter((part) => part.trim()); + return splitTexts; + }; + + const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => { + const forbidOverlap = step <= 6; + const maxOverlapLen = chunkLen * 0.4; + + // step >= stepReges.length: Do not overlap incomplete sentences + if (forbidOverlap || overlapLen === 0 || step >= stepReges.length) return ''; + + const splitTexts = getSplitTexts({ text, step }); + let overlayText = ''; + + for (let i = splitTexts.length - 1; i >= 0; i--) { + const currentText = splitTexts[i]; + const newText = currentText + overlayText; + const newTextLen = newText.length; + + if (newTextLen > overlapLen) { + if (newTextLen > maxOverlapLen) { + const text = getOneTextOverlapText({ text: newText, step: step + 1 }); + return text || overlayText; + } + return newText; + } + + overlayText = newText; + } + return overlayText; }; const splitTextRecursively = ({ text = '', step, - lastChunk, - overlayChunk + lastText }: { text: string; step: number; - lastChunk: string; - overlayChunk: string; - }) => { - if (text.length <= maxLen) { + lastText: string; + }): string[] => { + // mini text + if (text.length <= chunkLen) { return [text]; } - const reg = stepReg[step]; - const isMarkdownSplit = step < 4; - if (!reg) { - // use slice-maxLen to split text + // oversize + if (step >= stepReges.length) { + if (text.length < chunkLen * 3) { + return [text]; + } + // use slice-chunkLen to split text const chunks: string[] = []; - let chunk = ''; - for (let i = 0; i < text.length; i += maxLen - overlapLen) { - chunk = text.slice(i, i + maxLen); - chunks.push(chunk); + for (let i = 0; i < text.length; i += chunkLen - overlapLen) { + chunks.push(text.slice(i, i + chunkLen)); } return chunks; } + const { maxLen } = stepReges[step]; + const minChunkLen = chunkLen * 0.7; + // split text by special char - const splitTexts = (() => { - if (!reg.test(text)) { - return [text]; - } - return text - .replace(reg, isMarkdownSplit ? `${tempMarker}$1` : `$1${tempMarker}`) - .split(`${tempMarker}`) - .filter((part) => part); - })(); + const splitTexts = getSplitTexts({ text, step }); - let chunks: string[] = []; + const chunks: string[] = []; for (let i = 0; i < splitTexts.length; i++) { - let text = splitTexts[i]; - let chunkToken = lastChunk.length; - const textToken = text.length; + const currentText = splitTexts[i]; + const currentTextLen = currentText.length; + const lastTextLen = lastText.length; + const newText = lastText + currentText; + const newTextLen = lastTextLen + currentTextLen; - // next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen) - if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) { - // last chunk is too large, push it to chunks, not add to next chunk - if (chunkToken > maxLen * 0.7) { - chunks.push(lastChunk); - lastChunk = ''; - overlayChunk = ''; + // newText is too large(now, The lastText must be smaller than chunkLen) + if (newTextLen > maxLen) { + // lastText greater minChunkLen, direct push it to chunks, not add to next chunk. (large lastText) + if (lastTextLen > minChunkLen) { + chunks.push(lastText); + lastText = getOneTextOverlapText({ text: lastText, step }); // next chunk will start with overlayText + i--; + continue; } - // chunk is small, insert to next chunks + + // split new Text, split chunks must will greater 1 (small lastText) const innerChunks = splitTextRecursively({ - text, + text: newText, step: step + 1, - lastChunk, - overlayChunk + lastText: '' }); - if (innerChunks.length === 0) continue; - chunks = chunks.concat(innerChunks); - lastChunk = ''; - overlayChunk = ''; + const lastChunk = innerChunks[innerChunks.length - 1]; + // last chunk is too small, concat it to lastText + if (lastChunk.length < minChunkLen) { + chunks.push(...innerChunks.slice(0, -1)); + lastText = lastChunk; + } else { + chunks.push(...innerChunks); + // compute new overlapText + lastText = getOneTextOverlapText({ + text: lastChunk, + step + }); + } continue; } - // size less than maxLen, push text to last chunk - lastChunk += text; - chunkToken += textToken; // Definitely less than 1.4 * maxLen + // size less than chunkLen, push text to last chunk. now, text definitely less than maxLen + lastText = newText; - // size over lapLen, push it to next chunk - if ( - overlapLen !== 0 && - !isMarkdownSplit && - chunkToken >= maxLen - overlapLen && - textToken < overlapLen - ) { - overlayChunk += text; - } - if (chunkToken >= maxLen) { - chunks.push(lastChunk); - lastChunk = overlayChunk; - overlayChunk = ''; + // If the chunk size reaches, add a chunk + if (newTextLen >= chunkLen) { + chunks.push(lastText); + lastText = getOneTextOverlapText({ text: lastText, step }); } } /* If the last chunk is independent, it needs to be push chunks. */ - if (lastChunk && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastChunk)) { - chunks.push(lastChunk); + if (lastText && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastText)) { + if (lastText.length < chunkLen * 0.4) { + chunks[chunks.length - 1] = chunks[chunks.length - 1] + lastText; + } else { + chunks.push(lastText); + } } return chunks; }; try { - const chunks = splitTextRecursively({ text, step: 0, lastChunk: '', overlayChunk: '' }); + const chunks = splitTextRecursively({ + text, + step: 0, + lastText: '' + }); const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0); diff --git a/packages/service/common/response/index.ts b/packages/service/common/response/index.ts index 323b9764c..04a6b451d 100644 --- a/packages/service/common/response/index.ts +++ b/packages/service/common/response/index.ts @@ -102,13 +102,13 @@ export function responseWriteController({ readStream: any; }) { res.on('drain', () => { - readStream.resume(); + readStream?.resume?.(); }); return (text: string | Buffer) => { const writeResult = res.write(text); if (!writeResult) { - readStream?.pause(); + readStream?.pause?.(); } }; } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 71c250c23..ca89bb5c5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -219,7 +219,7 @@ importers: specifier: ^4.17.21 version: registry.npmmirror.com/lodash@4.17.21 mammoth: - specifier: ^1.5.1 + specifier: ^1.6.0 version: registry.npmmirror.com/mammoth@1.6.0 mermaid: specifier: ^10.2.3 diff --git a/projects/app/package.json b/projects/app/package.json index 58c4fab8f..01de1ee19 100644 --- a/projects/app/package.json +++ b/projects/app/package.json @@ -1,6 +1,6 @@ { "name": "app", - "version": "4.6.1", + "version": "4.6.2", "private": false, "scripts": { "dev": "next dev", @@ -38,7 +38,7 @@ "jsdom": "^22.1.0", "jsonwebtoken": "^9.0.2", "lodash": "^4.17.21", - "mammoth": "^1.5.1", + "mammoth": "^1.6.0", "mermaid": "^10.2.3", "multer": "1.4.5-lts.1", "nanoid": "^4.0.1", diff --git a/projects/app/public/docs/versionIntro.md b/projects/app/public/docs/versionIntro.md index 47e3c9d1f..b467ede7e 100644 --- a/projects/app/public/docs/versionIntro.md +++ b/projects/app/public/docs/versionIntro.md @@ -1,12 +1,13 @@ -### Fast GPT V4.6 +### Fast GPT V4.6.2 1. 新增 - 团队空间 2. 新增 - 多路向量(多个向量映射一组数据) 3. 新增 - tts语音 -4. 线上环境新增 - ReRank向量召回,提高召回精度 -5. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈 -6. [知识库结构详解](https://doc.fastgpt.in/docs/use-cases/datasetengine/) -7. [知识库提示词详解](https://doc.fastgpt.in/docs/use-cases/ai_settings/#引用模板--引用提示词) -8. [使用文档](https://doc.fastgpt.in/docs/intro/) -9. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow) -10. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/) +4. 新增 - 语音输入 +5. 新增 - 增强召回方式,提高召回精度 +6. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈 +7. [知识库结构详解](https://doc.fastgpt.in/docs/use-cases/datasetengine/) +8. [知识库提示词详解](https://doc.fastgpt.in/docs/use-cases/ai_settings/#引用模板--引用提示词) +9. [使用文档](https://doc.fastgpt.in/docs/intro/) +10. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow) +11. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/) diff --git a/projects/app/public/imgs/modal/key.svg b/projects/app/public/imgs/modal/key.svg index 96a8fe127..8b86419a3 100644 --- a/projects/app/public/imgs/modal/key.svg +++ b/projects/app/public/imgs/modal/key.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json index c722e81b0..e62b94580 100644 --- a/projects/app/public/locales/en/common.json +++ b/projects/app/public/locales/en/common.json @@ -193,6 +193,9 @@ "unKnow": "There was an accident" }, "export": "", + "file": { + "Select file amount limit 100": "You can select a maximum of 100 files at a time" + }, "folder": { "Drag Tip": "Click and move", "Move Success": "Move Success", @@ -260,10 +263,22 @@ "Similarity": "Similarity", "data": { "Edit": "Edit Data", + "data is deleted": "Data is deleted", "id": "Data ID" }, + "import": { + "Ideal chunk length": "Ideal chunk length", + "Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200" + }, "test": { - "Test Result": "Results" + "Test": "Start", + "Test Result": "Results", + "Test Text": "Text", + "Test Text Placeholder": "Enter the text you want to test", + "delete test history": "Delete the test result", + "test history": "Test History", + "test result placeholder": "The test results will be presented here", + "test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data." } }, "module": { diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json index 53a384816..a53fd8447 100644 --- a/projects/app/public/locales/zh/common.json +++ b/projects/app/public/locales/zh/common.json @@ -193,6 +193,9 @@ "unKnow": "出现了点意外~" }, "export": "", + "file": { + "Select file amount limit 100": "每次最多选择100个文件" + }, "folder": { "Drag Tip": "点我可拖动", "Move Success": "移动成功", @@ -260,10 +263,22 @@ "Similarity": "相似度", "data": { "Edit": "编辑数据", + "data is deleted": "该数据已被删除", "id": "数据ID" }, + "import": { + "Ideal chunk length": "理想分块长度", + "Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号,以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200" + }, "test": { - "Test Result": "测试结果" + "Test": "测试", + "Test Result": "测试结果", + "Test Text": "测试文本", + "Test Text Placeholder": "输入需要测试的文本", + "delete test history": "删除该测试结果", + "test history": "测试历史", + "test result placeholder": "测试结果将在这里展示", + "test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意:测试记录中的数据可能已经被修改过,点击某条测试数据后将展示最新的数据。" } }, "module": { diff --git a/projects/app/src/components/ChatBox/MessageInput.tsx b/projects/app/src/components/ChatBox/MessageInput.tsx index 0ec024f6f..d95466b33 100644 --- a/projects/app/src/components/ChatBox/MessageInput.tsx +++ b/projects/app/src/components/ChatBox/MessageInput.tsx @@ -8,7 +8,7 @@ import MyIcon from '../Icon'; import styles from './index.module.scss'; import { useRouter } from 'next/router'; import { useSelectFile } from '@/web/common/file/hooks/useSelectFile'; -import { compressImgAndUpload } from '@/web/common/file/controller'; +import { compressImgFileAndUpload } from '@/web/common/file/controller'; import { useToast } from '@/web/common/hooks/useToast'; import { customAlphabet } from 'nanoid'; import { IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants'; @@ -72,7 +72,7 @@ const MessageInput = ({ const uploadFile = async (file: FileItemType) => { if (file.type === FileTypeEnum.image) { try { - const src = await compressImgAndUpload({ + const src = await compressImgFileAndUpload({ file: file.rawFile, maxW: 1000, maxH: 1000, diff --git a/projects/app/src/components/Markdown/img/Image.tsx b/projects/app/src/components/Markdown/img/Image.tsx index 7a5f4752e..405b6059f 100644 --- a/projects/app/src/components/Markdown/img/Image.tsx +++ b/projects/app/src/components/Markdown/img/Image.tsx @@ -1,5 +1,6 @@ import React, { useState } from 'react'; import { + Box, Image, Modal, ModalCloseButton, @@ -8,6 +9,7 @@ import { Skeleton, useDisclosure } from '@chakra-ui/react'; +import MyModal from '@/components/MyModal'; const MdImage = ({ src }: { src?: string }) => { const [isLoading, setIsLoading] = useState(true); @@ -43,17 +45,21 @@ const MdImage = ({ src }: { src?: string }) => { onOpen(); }} /> - + - - {''} + + + {''} + diff --git a/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx b/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx index 906b99ebd..1bd82767f 100644 --- a/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx +++ b/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx @@ -2,7 +2,7 @@ import React, { useCallback, useState } from 'react'; import { useForm } from 'react-hook-form'; import { useTranslation } from 'next-i18next'; import { useSelectFile } from '@/web/common/file/hooks/useSelectFile'; -import { compressImgAndUpload } from '@/web/common/file/controller'; +import { compressImgFileAndUpload } from '@/web/common/file/controller'; import { useToast } from '@/web/common/hooks/useToast'; import { getErrText } from '@fastgpt/global/common/error/utils'; import { useRequest } from '@/web/common/hooks/useRequest'; @@ -49,7 +49,7 @@ function EditModal({ const file = e[0]; if (!file) return; try { - const src = await compressImgAndUpload({ + const src = await compressImgFileAndUpload({ file, maxW: 100, maxH: 100 diff --git a/projects/app/src/global/core/prompt/AIChat.ts b/projects/app/src/global/core/prompt/AIChat.ts index 9de23385c..ebfbd3c1d 100644 --- a/projects/app/src/global/core/prompt/AIChat.ts +++ b/projects/app/src/global/core/prompt/AIChat.ts @@ -35,7 +35,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [ 1. 背景知识是最新的实时的信息,使用背景知识回答问题。 2. 优先使用背景知识的内容回答我的问题,答案应与背景知识严格一致。 3. 背景知识无法回答我的问题时,可以忽略背景知识,根据你的知识来自由回答。 -4. 使用对话的风格,自然的回答问题。 +4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。 我的问题是:"{{question}}"` }, { @@ -49,7 +49,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [ 1. 背景知识是最新的实时的信息,使用背景知识回答问题,其中 instruction 是相关介绍,output 是预期回答或补充。 2. 优先使用背景知识的内容回答我的问题,答案应与背景知识严格一致。 3. 背景知识无法回答我的问题时,可以忽略背景知识,根据你的知识来自由回答。 -4. 使用对话的风格,自然的回答问题。 +4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。 我的问题是:"{{question}}"` }, { @@ -63,7 +63,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [ 1. 背景知识是最新的实时的信息,是你的唯一信息来源,使用背景知识回答问题。 2. 优先使用背景知识回答我的问题,答案与背景知识完全一致,无需做其他回答。 3. 背景知识与问题无关,或背景知识无法回答本次问题时,则拒绝回答本次问题:“我不太清除xxx”。 -4. 使用对话的风格,自然的回答问题。 +4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。 我的问题是:"{{question}}"` }, { diff --git a/projects/app/src/global/core/prompt/agent.ts b/projects/app/src/global/core/prompt/agent.ts index d21793ca4..a6123d9b5 100644 --- a/projects/app/src/global/core/prompt/agent.ts +++ b/projects/app/src/global/core/prompt/agent.ts @@ -1,18 +1,17 @@ export const Prompt_AgentQA = { - prompt: `我会给你一段文本,{{theme}},学习它们,并整理学习成果,要求为: + description: `我会给你一段文本,学习它们,并整理学习成果,要求为: 1. 提出问题并给出每个问题的答案。 2. 每个答案都要详细完整,给出相关原文描述,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。 3. 最多提出 30 个问题。 -4. 按格式返回多个问题和答案: - +`, + fixedText: `最后,你需要按下面的格式返回多个问题和答案: Q1: 问题。 A1: 答案。 Q2: A2: …… -我的文本:"""{{text}}"""`, - defaultTheme: '它们可能包含多个主题内容' +我的文本:"""{{text}}"""` }; export const Prompt_ExtractJson = `你可以从 "对话记录" 中提取指定信息,并返回一个 JSON 对象,JSON 对象要求: diff --git a/projects/app/src/pages/account/components/Info.tsx b/projects/app/src/pages/account/components/Info.tsx index 530014d67..47cab4af6 100644 --- a/projects/app/src/pages/account/components/Info.tsx +++ b/projects/app/src/pages/account/components/Info.tsx @@ -17,7 +17,7 @@ import type { UserType } from '@fastgpt/global/support/user/type.d'; import { useQuery } from '@tanstack/react-query'; import dynamic from 'next/dynamic'; import { useSelectFile } from '@/web/common/file/hooks/useSelectFile'; -import { compressImgAndUpload } from '@/web/common/file/controller'; +import { compressImgFileAndUpload } from '@/web/common/file/controller'; import { feConfigs, systemVersion } from '@/web/common/system/staticData'; import { useTranslation } from 'next-i18next'; import { timezoneList } from '@fastgpt/global/common/time/timezone'; @@ -94,7 +94,7 @@ const UserInfo = () => { const file = e[0]; if (!file || !userInfo) return; try { - const src = await compressImgAndUpload({ + const src = await compressImgFileAndUpload({ file, maxW: 100, maxH: 100 diff --git a/projects/app/src/pages/app/detail/components/InfoModal.tsx b/projects/app/src/pages/app/detail/components/InfoModal.tsx index 11b3da073..913b365fb 100644 --- a/projects/app/src/pages/app/detail/components/InfoModal.tsx +++ b/projects/app/src/pages/app/detail/components/InfoModal.tsx @@ -14,7 +14,7 @@ import { useForm } from 'react-hook-form'; import { AppSchema } from '@fastgpt/global/core/app/type.d'; import { useToast } from '@/web/common/hooks/useToast'; import { useSelectFile } from '@/web/common/file/hooks/useSelectFile'; -import { compressImgAndUpload } from '@/web/common/file/controller'; +import { compressImgFileAndUpload } from '@/web/common/file/controller'; import { getErrText } from '@fastgpt/global/common/error/utils'; import { useRequest } from '@/web/common/hooks/useRequest'; import Avatar from '@/components/Avatar'; @@ -101,7 +101,7 @@ const InfoModal = ({ const file = e[0]; if (!file) return; try { - const src = await compressImgAndUpload({ + const src = await compressImgFileAndUpload({ file, maxW: 100, maxH: 100 diff --git a/projects/app/src/pages/app/list/component/CreateModal.tsx b/projects/app/src/pages/app/list/component/CreateModal.tsx index 80bb70a80..684c43114 100644 --- a/projects/app/src/pages/app/list/component/CreateModal.tsx +++ b/projects/app/src/pages/app/list/component/CreateModal.tsx @@ -13,7 +13,7 @@ import { } from '@chakra-ui/react'; import { useSelectFile } from '@/web/common/file/hooks/useSelectFile'; import { useForm } from 'react-hook-form'; -import { compressImgAndUpload } from '@/web/common/file/controller'; +import { compressImgFileAndUpload } from '@/web/common/file/controller'; import { getErrText } from '@fastgpt/global/common/error/utils'; import { useToast } from '@/web/common/hooks/useToast'; import { postCreateApp } from '@/web/core/app/api'; @@ -58,7 +58,7 @@ const CreateModal = ({ onClose, onSuccess }: { onClose: () => void; onSuccess: ( const file = e[0]; if (!file) return; try { - const src = await compressImgAndUpload({ + const src = await compressImgFileAndUpload({ file, maxW: 100, maxH: 100 diff --git a/projects/app/src/pages/dataset/detail/components/Import/Chunk.tsx b/projects/app/src/pages/dataset/detail/components/Import/Chunk.tsx index ea0f707d4..b43535b33 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/Chunk.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/Chunk.tsx @@ -16,10 +16,12 @@ import { QuestionOutlineIcon } from '@chakra-ui/icons'; import { useDatasetStore } from '@/web/core/dataset/store/dataset'; import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider'; +import { useTranslation } from 'next-i18next'; -const fileExtension = '.txt, .doc, .docx, .pdf, .md'; +const fileExtension = '.txt, .docx, .pdf, .md'; const ChunkImport = () => { + const { t } = useTranslation(); const { datasetDetail } = useDatasetStore(); const vectorModel = datasetDetail.vectorModel; const unitPrice = vectorModel?.price || 0.2; @@ -48,13 +50,8 @@ const ChunkImport = () => { {/* chunk size */} - 段落长度 - + {t('core.dataset.import.Ideal chunk length')} + diff --git a/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx b/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx index 11f938404..d45001efe 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx @@ -48,6 +48,7 @@ export interface Props extends BoxProps { onPushFiles: (files: FileItemType[]) => void; tipText?: string; chunkLen?: number; + overlapRatio?: number; fileTemplate?: { type: string; filename: string; @@ -63,6 +64,7 @@ const FileSelect = ({ onPushFiles, tipText, chunkLen = 500, + overlapRatio, fileTemplate, showUrlFetch = true, showCreateFile = true, @@ -97,6 +99,13 @@ const FileSelect = ({ // select file const onSelectFile = useCallback( async (files: File[]) => { + if (files.length >= 100) { + return toast({ + status: 'warning', + title: t('common.file.Select file amount limit 100') + }); + } + try { for await (let file of files) { const extension = file?.name?.split('.')?.pop()?.toLowerCase(); @@ -165,7 +174,6 @@ const FileSelect = ({ return readTxtContent(file); case 'pdf': return readPdfContent(file); - case 'doc': case 'docx': return readDocContent(file); } @@ -176,7 +184,8 @@ const FileSelect = ({ text = simpleText(text); const splitRes = splitText2Chunks({ text, - maxLen: chunkLen + chunkLen, + overlapRatio }); const fileItem: FileItemType = { @@ -206,7 +215,7 @@ const FileSelect = ({ } setSelectingText(undefined); }, - [chunkLen, datasetDetail._id, onPushFiles, t, toast] + [chunkLen, datasetDetail._id, onPushFiles, overlapRatio, t, toast] ); // link fetch const onUrlFetch = useCallback( @@ -214,7 +223,8 @@ const FileSelect = ({ const result: FileItemType[] = e.map(({ url, content }) => { const splitRes = splitText2Chunks({ text: content, - maxLen: chunkLen + chunkLen, + overlapRatio }); return { id: nanoid(), @@ -234,7 +244,7 @@ const FileSelect = ({ }); onPushFiles(result); }, - [chunkLen, onPushFiles] + [chunkLen, onPushFiles, overlapRatio] ); // manual create file and copy data const onCreateFile = useCallback( @@ -255,7 +265,8 @@ const FileSelect = ({ const splitRes = splitText2Chunks({ text: content, - maxLen: chunkLen + chunkLen, + overlapRatio }); onPushFiles([ @@ -276,7 +287,7 @@ const FileSelect = ({ } ]); }, - [chunkLen, datasetDetail._id, onPushFiles] + [chunkLen, datasetDetail._id, onPushFiles, overlapRatio] ); const handleDragEnter = (e: DragEvent) => { diff --git a/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx b/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx index d39da93b5..b12481210 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx @@ -41,16 +41,19 @@ const ImportData = ({ const map = { [ImportTypeEnum.chunk]: { defaultChunkLen: vectorModel?.defaultToken || 500, + chunkOverlapRatio: 0.2, unitPrice: vectorModel?.price || 0.2, mode: TrainingModeEnum.chunk }, [ImportTypeEnum.qa]: { - defaultChunkLen: agentModel?.maxContext * 0.6 || 9000, + defaultChunkLen: agentModel?.maxContext * 0.6 || 8000, + chunkOverlapRatio: 0, unitPrice: agentModel?.price || 3, mode: TrainingModeEnum.qa }, [ImportTypeEnum.csv]: { defaultChunkLen: vectorModel?.defaultToken || 500, + chunkOverlapRatio: 0, unitPrice: vectorModel?.price || 0.2, mode: TrainingModeEnum.chunk } diff --git a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx index 9d4d56c86..5ed496a91 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx @@ -44,6 +44,7 @@ type useImportStoreType = { price: number; uploading: boolean; chunkLen: number; + chunkOverlapRatio: number; setChunkLen: Dispatch; showRePreview: boolean; setReShowRePreview: Dispatch>; @@ -66,6 +67,7 @@ const StateContext = createContext({ }, price: 0, chunkLen: 0, + chunkOverlapRatio: 0, setChunkLen: function (value: number): void { throw new Error('Function not implemented.'); }, @@ -93,6 +95,7 @@ const Provider = ({ vectorModel, agentModel, defaultChunkLen = 500, + chunkOverlapRatio = 0.2, importType, onUploadSuccess, children @@ -104,6 +107,7 @@ const Provider = ({ vectorModel: string; agentModel: string; defaultChunkLen: number; + chunkOverlapRatio: number; importType: `${ImportTypeEnum}`; onUploadSuccess: () => void; children: React.ReactNode; @@ -180,7 +184,8 @@ const Provider = ({ state.map((file) => { const splitRes = splitText2Chunks({ text: file.text, - maxLen: chunkLen + chunkLen, + overlapRatio: chunkOverlapRatio }); return { @@ -228,6 +233,7 @@ const Provider = ({ onclickUpload, uploading, chunkLen, + chunkOverlapRatio, setChunkLen, showRePreview, setReShowRePreview @@ -413,7 +419,8 @@ export const SelectorContainer = ({ tip?: string; children: React.ReactNode; }) => { - const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen } = useImportStore(); + const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen, chunkOverlapRatio } = + useImportStore(); return ( files.concat(state)); }} chunkLen={chunkLen} + overlapRatio={chunkOverlapRatio} showUrlFetch={showUrlFetch} showCreateFile={showCreateFile} fileTemplate={fileTemplate} diff --git a/projects/app/src/pages/dataset/detail/components/Import/QA.tsx b/projects/app/src/pages/dataset/detail/components/Import/QA.tsx index d6bc531ad..23a504ed9 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/QA.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/QA.tsx @@ -1,15 +1,14 @@ -import React, { useState, useMemo } from 'react'; -import { Box, Flex, Button, Input } from '@chakra-ui/react'; +import React, { useState } from 'react'; +import { Box, Flex, Button, Textarea } from '@chakra-ui/react'; import { useConfirm } from '@/web/common/hooks/useConfirm'; import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools'; import MyTooltip from '@/components/MyTooltip'; -import { QuestionOutlineIcon, InfoOutlineIcon } from '@chakra-ui/icons'; +import { QuestionOutlineIcon } from '@chakra-ui/icons'; import { Prompt_AgentQA } from '@/global/core/prompt/agent'; -import { replaceVariable } from '@fastgpt/global/common/string/tools'; import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider'; import { useDatasetStore } from '@/web/core/dataset/store/dataset'; -const fileExtension = '.txt, .doc, .docx, .pdf, .md'; +const fileExtension = '.txt, .docx, .pdf, .md'; const QAImport = () => { const { datasetDetail } = useDatasetStore(); @@ -31,36 +30,27 @@ const QAImport = () => { content: `该任务无法终止!导入后会自动调用大模型生成问答对,会有一些细节丢失,请确认!如果余额不足,未完成的任务会被暂停。` }); - const [prompt, setPrompt] = useState(''); - - const previewQAPrompt = useMemo(() => { - return replaceVariable(Prompt_AgentQA.prompt, { - theme: prompt || Prompt_AgentQA.defaultTheme - }); - }, [prompt]); + const [prompt, setPrompt] = useState(Prompt_AgentQA.description); return ( {/* prompt */} - - - QA 拆分引导词{' '} - - - + + + QA 拆分引导词 - - 文件主题 - +