From 1fcdd7cb8d763d42017e790e4d4fe091860f81a1 Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Sat, 19 Aug 2023 12:54:24 +0800 Subject: [PATCH] feat: url fetch and create file (#199) * docs * docs * feat: url fetch and create file --- README.md | 1 + client/public/imgs/files/url.svg | 1 + client/public/locales/en/common.json | 7 +- client/public/locales/zh/common.json | 7 +- client/src/api/plugins/common.ts | 6 + client/src/constants/flow/ModuleTemplate.ts | 2 +- client/src/hooks/useSelectFile.tsx | 12 +- client/src/pages/api/plugins/urlFetch.ts | 12 +- .../pages/kb/detail/components/DataCard.tsx | 2 +- .../kb/detail/components/Import/Chunk.tsx | 174 ++++------ .../components/Import/CreateFileModal.tsx | 63 ++++ .../pages/kb/detail/components/Import/Csv.tsx | 87 ++--- .../detail/components/Import/FileSelect.tsx | 312 +++++++++++++++--- .../pages/kb/detail/components/Import/QA.tsx | 166 ++++------ .../components/Import/UrlFetchModal.tsx | 67 ++++ client/src/types/plugin.d.ts | 5 + client/src/utils/file.ts | 10 +- docSite/docs/develop/deploy/docker.md | 4 +- docSite/docs/develop/update/41init.md | 2 +- .../current/develop/update/41init.md | 2 +- 20 files changed, 583 insertions(+), 359 deletions(-) create mode 100644 client/public/imgs/files/url.svg create mode 100644 client/src/api/plugins/common.ts create mode 100644 client/src/pages/kb/detail/components/Import/CreateFileModal.tsx create mode 100644 client/src/pages/kb/detail/components/Import/UrlFetchModal.tsx diff --git a/README.md b/README.md index 5a0d7197c..be08746f0 100644 --- a/README.md +++ b/README.md @@ -119,3 +119,4 @@ FastGPT 是一个基于 LLM 大语言模型的知识库问答系统,提供开 1. 允许作为后台服务直接商用,但不允许直接使用 saas 服务商用。 2. 需保留相关版权信息。 3. 完整请查看 [FstGPT Open Source License](./LICENSE) +4. 联系方式:yujinlong@sealos.io, [点击查看定价策略](https://fael3z0zfze.feishu.cn/docx/F155dbirfo8vDDx2WgWc6extnwf) diff --git a/client/public/imgs/files/url.svg b/client/public/imgs/files/url.svg new file mode 100644 index 000000000..3a526a91e --- /dev/null +++ b/client/public/imgs/files/url.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/client/public/locales/en/common.json b/client/public/locales/en/common.json index 45a4449ea..b83cef9b2 100644 --- a/client/public/locales/en/common.json +++ b/client/public/locales/en/common.json @@ -54,9 +54,14 @@ }, "file": { "Click to download CSV template": "Click to download CSV template", - "Drag and drop": "Drag and drop files here, or click", + "Create File": "Create File", + "Create file": "Create file", + "Drag and drop": "Drag and drop files here", + "Fetch Url": "Fetch Url", "If the imported file is garbled, please convert CSV to UTF-8 encoding format": "If the imported file is garbled, please convert CSV to UTF-8 encoding format", "Release the mouse to upload the file": "Release the mouse to upload the file", + "Select a maximum of 10 files": "Select a maximum of 10 files", + "max 10": "Max 10 files", "select a document": "select a document", "support": "support {{fileExtension}} file", "upload error description": "Only upload multiple files or one folder at a time" diff --git a/client/public/locales/zh/common.json b/client/public/locales/zh/common.json index ed27797b9..9ad05aacd 100644 --- a/client/public/locales/zh/common.json +++ b/client/public/locales/zh/common.json @@ -54,9 +54,14 @@ }, "file": { "Click to download CSV template": "点击下载 CSV 模板", - "Drag and drop": "拖拽文件至此,或点击", + "Create File": "创建新文件", + "Create file": "创建文件", + "Drag and drop": "拖拽文件至此", + "Fetch Url": "链接读取", "If the imported file is garbled, please convert CSV to UTF-8 encoding format": "如果导入文件乱码,请将 CSV 转成 UTF-8 编码格式", "Release the mouse to upload the file": "松开鼠标上传文件", + "Select a maximum of 10 files": "最多选择10个文件", + "max 10": "最多选择 10 个文件", "select a document": "选择文件", "support": "支持 {{fileExtension}} 文件", "upload error description": "单次只支持上传多个文件或者一个文件夹" diff --git a/client/src/api/plugins/common.ts b/client/src/api/plugins/common.ts new file mode 100644 index 000000000..598cfbfa0 --- /dev/null +++ b/client/src/api/plugins/common.ts @@ -0,0 +1,6 @@ +import { GET, POST, PUT, DELETE } from '../request'; + +import type { FetchResultItem } from '@/types/plugin'; + +export const fetchUrls = (urlList: string[]) => + POST(`/plugins/urlFetch`, { urlList }); diff --git a/client/src/constants/flow/ModuleTemplate.ts b/client/src/constants/flow/ModuleTemplate.ts index 874ab87ea..207393272 100644 --- a/client/src/constants/flow/ModuleTemplate.ts +++ b/client/src/constants/flow/ModuleTemplate.ts @@ -182,7 +182,7 @@ export const ChatModule: FlowModuleTemplateType = { { key: TaskResponseKeyEnum.answerText, label: '模型回复', - description: '如果外接了内容,会在回复结束时自动添加\n\n', + description: '将在 stream 回复完毕后触发', valueType: FlowValueTypeEnum.string, type: FlowOutputItemTypeEnum.source, targets: [] diff --git a/client/src/hooks/useSelectFile.tsx b/client/src/hooks/useSelectFile.tsx index d7c4d7541..9570cc642 100644 --- a/client/src/hooks/useSelectFile.tsx +++ b/client/src/hooks/useSelectFile.tsx @@ -1,8 +1,12 @@ import React, { useRef, useCallback } from 'react'; import { Box } from '@chakra-ui/react'; +import { useToast } from './useToast'; +import { useTranslation } from 'react-i18next'; export const useSelectFile = (props?: { fileType?: string; multiple?: boolean }) => { + const { t } = useTranslation(); const { fileType = '*', multiple = false } = props || {}; + const { toast } = useToast(); const SelectFileDom = useRef(null); const File = useCallback( @@ -15,12 +19,18 @@ export const useSelectFile = (props?: { fileType?: string; multiple?: boolean }) multiple={multiple} onChange={(e) => { if (!e.target.files || e.target.files?.length === 0) return; + if (e.target.files.length > 10) { + return toast({ + status: 'warning', + title: t('file.Select a maximum of 10 files') + }); + } onSelect(Array.from(e.target.files)); }} /> ), - [fileType, multiple] + [fileType, multiple, t, toast] ); const onOpen = useCallback(() => { diff --git a/client/src/pages/api/plugins/urlFetch.ts b/client/src/pages/api/plugins/urlFetch.ts index e5d8d81dd..1ff3df6a4 100644 --- a/client/src/pages/api/plugins/urlFetch.ts +++ b/client/src/pages/api/plugins/urlFetch.ts @@ -5,12 +5,9 @@ import { JSDOM } from 'jsdom'; import { Readability } from '@mozilla/readability'; import { jsonRes } from '@/service/response'; import { authUser } from '@/service/utils/auth'; +import type { FetchResultItem } from '@/types/plugin'; +import { simpleText } from '@/utils/file'; -type FetchResultItem = { - url: string; - title: string; - content: string; -}; export type UrlFetchResponse = FetchResultItem[]; const fetchContent = async (req: NextApiRequest, res: NextApiResponse) => { @@ -38,10 +35,11 @@ const fetchContent = async (req: NextApiRequest, res: NextApiResponse) => { const reader = new Readability(dom.window.document); const article = reader.parse(); + const content = article?.textContent || ''; + return { url, - title: article?.title || '', - content: article?.textContent || '' + content: simpleText(`${article?.title}\n${content}`) }; }) ) diff --git a/client/src/pages/kb/detail/components/DataCard.tsx b/client/src/pages/kb/detail/components/DataCard.tsx index 4ef1c26ad..d489affb7 100644 --- a/client/src/pages/kb/detail/components/DataCard.tsx +++ b/client/src/pages/kb/detail/components/DataCard.tsx @@ -217,7 +217,7 @@ const DataCard = ({ kbId }: { kbId: string }) => { {item.a} - + {item.source?.trim()} { - const model = vectorModelList[0]?.model; + const model = vectorModelList[0]?.model || 'text-embedding-ada-002'; const unitPrice = vectorModelList[0]?.price || 0.2; const theme = useTheme(); const router = useRouter(); @@ -52,7 +39,6 @@ const ChunkImport = ({ kbId }: { kbId: string }) => { const [chunkLen, setChunkLen] = useState(500); const [showRePreview, setShowRePreview] = useState(false); - const [selecting, setSelecting] = useState(false); const [files, setFiles] = useState([]); const [previewFile, setPreviewFile] = useState(); const [successChunks, setSuccessChunks] = useState(0); @@ -72,73 +58,9 @@ const ChunkImport = ({ kbId }: { kbId: string }) => { content: `该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。` }); - const onSelectFile = useCallback( - async (files: File[]) => { - setSelecting(true); - try { - let promise = Promise.resolve(); - files.forEach((file) => { - promise = promise.then(async () => { - const extension = file?.name?.split('.')?.pop()?.toLowerCase(); - const icon = fileImgs.find((item) => new RegExp(item.reg).test(file.name))?.src; - const text = await (async () => { - switch (extension) { - case 'txt': - case 'md': - return readTxtContent(file); - case 'pdf': - return readPdfContent(file); - case 'doc': - case 'docx': - return readDocContent(file); - } - return ''; - })(); - - if (icon && text) { - const splitRes = splitText2Chunks({ - text: text, - maxLen: chunkLen - }); - - setFiles((state) => [ - { - id: nanoid(), - filename: file.name, - text, - icon, - ...splitRes - }, - ...state - ]); - } - }); - }); - await promise; - } catch (error: any) { - console.log(error); - toast({ - title: typeof error === 'string' ? error : '解析文件失败', - status: 'error' - }); - } - setSelecting(false); - }, - [chunkLen, toast] - ); - const { mutate: onclickUpload, isLoading: uploading } = useMutation({ mutationFn: async () => { - const chunks: { a: string; q: string; source: string }[] = []; - files.forEach((file) => - file.chunks.forEach((chunk) => { - chunks.push({ - q: chunk, - a: '', - source: file.filename - }); - }) - ); + const chunks = files.map((file) => file.chunks).flat(); // subsection import let success = 0; @@ -177,18 +99,22 @@ const ChunkImport = ({ kbId }: { kbId: string }) => { const onRePreview = useCallback(async () => { try { - const splitRes = files.map((item) => - splitText2Chunks({ - text: item.text, - maxLen: chunkLen - }) - ); - setFiles((state) => - state.map((file, index) => ({ - ...file, - ...splitRes[index] - })) + state.map((file) => { + const splitRes = splitText2Chunks({ + text: file.text, + maxLen: chunkLen + }); + return { + ...file, + tokens: splitRes.tokens, + chunks: splitRes.chunks.map((chunk) => ({ + q: chunk, + a: '', + source: file.filename + })) + }; + }) ); setPreviewFile(undefined); setShowRePreview(false); @@ -198,7 +124,12 @@ const ChunkImport = ({ kbId }: { kbId: string }) => { title: getErrText(error, '文本分段异常') }); } - }, [chunkLen, files, toast]); + }, [chunkLen, toast]); + + const filenameStyles = { + className: 'textEllipsis', + maxW: '400px' + }; return ( @@ -212,8 +143,10 @@ const ChunkImport = ({ kbId }: { kbId: string }) => { > { + setFiles((state) => files.concat(state)); + }} + chunkLen={chunkLen} py={emptyFiles ? '100px' : 5} /> @@ -241,7 +174,7 @@ const ChunkImport = ({ kbId }: { kbId: string }) => { onClick={() => setPreviewFile(item)} > {''} - + {item.filename} { pt={[4, 8]} bg={'myWhite.400'} > - + {previewFile.filename} { ) : ( - - 分段预览({totalChunk}组) - + + + 分段预览({totalChunk}组) + + {totalChunk > 100 && ( + + 仅展示部分 + + )} + {files.map((file) => - file.chunks.map((item, i) => ( + file.chunks.slice(0, 50).map((chunk, i) => ( { _hover={{ ...hoverDeleteStyles }} > - + # {i + 1} + + {file.filename} + { @@ -417,11 +366,12 @@ const ChunkImport = ({ kbId }: { kbId: string }) => { whiteSpace={'pre-wrap'} wordBreak={'break-all'} contentEditable - dangerouslySetInnerHTML={{ __html: item }} + dangerouslySetInnerHTML={{ __html: chunk.q }} onBlur={(e) => { // @ts-ignore const val = e.target.innerText; + /* delete file */ if (val === '') { setFiles((state) => state.map((stateFile) => @@ -437,14 +387,16 @@ const ChunkImport = ({ kbId }: { kbId: string }) => { ) ); } else { - setFiles((state) => - state.map((stateFile) => - stateFile.id === file.id + // update file + setFiles((stateFiles) => + stateFiles.map((stateFile) => + file.id === stateFile.id ? { - ...file, - chunks: file.chunks.map((chunk, index) => - i === index ? val : chunk - ) + ...stateFile, + chunks: stateFile.chunks.map((chunk, index) => ({ + ...chunk, + q: i === index ? val : chunk.q + })) } : stateFile ) diff --git a/client/src/pages/kb/detail/components/Import/CreateFileModal.tsx b/client/src/pages/kb/detail/components/Import/CreateFileModal.tsx new file mode 100644 index 000000000..213d18cbb --- /dev/null +++ b/client/src/pages/kb/detail/components/Import/CreateFileModal.tsx @@ -0,0 +1,63 @@ +import React from 'react'; +import { useTranslation } from 'next-i18next'; +import MyModal from '@/components/MyModal'; +import { Box, Input, Textarea, ModalBody, ModalFooter, Button } from '@chakra-ui/react'; +import { useForm } from 'react-hook-form'; + +const CreateFileModal = ({ + onClose, + onSuccess +}: { + onClose: () => void; + onSuccess: (e: { filename: string; content: string }) => void; +}) => { + const { t } = useTranslation(); + const { register, handleSubmit } = useForm({ + defaultValues: { + filename: '', + content: '' + } + }); + + return ( + + + + 文件名 + + + + 文件内容 + +