diff --git a/README.md b/README.md
index 5a0d7197c..be08746f0 100644
--- a/README.md
+++ b/README.md
@@ -119,3 +119,4 @@ FastGPT 是一个基于 LLM 大语言模型的知识库问答系统,提供开
1. 允许作为后台服务直接商用,但不允许直接使用 saas 服务商用。
2. 需保留相关版权信息。
3. 完整请查看 [FstGPT Open Source License](./LICENSE)
+4. 联系方式:yujinlong@sealos.io, [点击查看定价策略](https://fael3z0zfze.feishu.cn/docx/F155dbirfo8vDDx2WgWc6extnwf)
diff --git a/client/public/imgs/files/url.svg b/client/public/imgs/files/url.svg
new file mode 100644
index 000000000..3a526a91e
--- /dev/null
+++ b/client/public/imgs/files/url.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/client/public/locales/en/common.json b/client/public/locales/en/common.json
index 45a4449ea..b83cef9b2 100644
--- a/client/public/locales/en/common.json
+++ b/client/public/locales/en/common.json
@@ -54,9 +54,14 @@
},
"file": {
"Click to download CSV template": "Click to download CSV template",
- "Drag and drop": "Drag and drop files here, or click",
+ "Create File": "Create File",
+ "Create file": "Create file",
+ "Drag and drop": "Drag and drop files here",
+ "Fetch Url": "Fetch Url",
"If the imported file is garbled, please convert CSV to UTF-8 encoding format": "If the imported file is garbled, please convert CSV to UTF-8 encoding format",
"Release the mouse to upload the file": "Release the mouse to upload the file",
+ "Select a maximum of 10 files": "Select a maximum of 10 files",
+ "max 10": "Max 10 files",
"select a document": "select a document",
"support": "support {{fileExtension}} file",
"upload error description": "Only upload multiple files or one folder at a time"
diff --git a/client/public/locales/zh/common.json b/client/public/locales/zh/common.json
index ed27797b9..9ad05aacd 100644
--- a/client/public/locales/zh/common.json
+++ b/client/public/locales/zh/common.json
@@ -54,9 +54,14 @@
},
"file": {
"Click to download CSV template": "点击下载 CSV 模板",
- "Drag and drop": "拖拽文件至此,或点击",
+ "Create File": "创建新文件",
+ "Create file": "创建文件",
+ "Drag and drop": "拖拽文件至此",
+ "Fetch Url": "链接读取",
"If the imported file is garbled, please convert CSV to UTF-8 encoding format": "如果导入文件乱码,请将 CSV 转成 UTF-8 编码格式",
"Release the mouse to upload the file": "松开鼠标上传文件",
+ "Select a maximum of 10 files": "最多选择10个文件",
+ "max 10": "最多选择 10 个文件",
"select a document": "选择文件",
"support": "支持 {{fileExtension}} 文件",
"upload error description": "单次只支持上传多个文件或者一个文件夹"
diff --git a/client/src/api/plugins/common.ts b/client/src/api/plugins/common.ts
new file mode 100644
index 000000000..598cfbfa0
--- /dev/null
+++ b/client/src/api/plugins/common.ts
@@ -0,0 +1,6 @@
+import { GET, POST, PUT, DELETE } from '../request';
+
+import type { FetchResultItem } from '@/types/plugin';
+
+export const fetchUrls = (urlList: string[]) =>
+ POST(`/plugins/urlFetch`, { urlList });
diff --git a/client/src/constants/flow/ModuleTemplate.ts b/client/src/constants/flow/ModuleTemplate.ts
index 874ab87ea..207393272 100644
--- a/client/src/constants/flow/ModuleTemplate.ts
+++ b/client/src/constants/flow/ModuleTemplate.ts
@@ -182,7 +182,7 @@ export const ChatModule: FlowModuleTemplateType = {
{
key: TaskResponseKeyEnum.answerText,
label: '模型回复',
- description: '如果外接了内容,会在回复结束时自动添加\n\n',
+ description: '将在 stream 回复完毕后触发',
valueType: FlowValueTypeEnum.string,
type: FlowOutputItemTypeEnum.source,
targets: []
diff --git a/client/src/hooks/useSelectFile.tsx b/client/src/hooks/useSelectFile.tsx
index d7c4d7541..9570cc642 100644
--- a/client/src/hooks/useSelectFile.tsx
+++ b/client/src/hooks/useSelectFile.tsx
@@ -1,8 +1,12 @@
import React, { useRef, useCallback } from 'react';
import { Box } from '@chakra-ui/react';
+import { useToast } from './useToast';
+import { useTranslation } from 'react-i18next';
export const useSelectFile = (props?: { fileType?: string; multiple?: boolean }) => {
+ const { t } = useTranslation();
const { fileType = '*', multiple = false } = props || {};
+ const { toast } = useToast();
const SelectFileDom = useRef(null);
const File = useCallback(
@@ -15,12 +19,18 @@ export const useSelectFile = (props?: { fileType?: string; multiple?: boolean })
multiple={multiple}
onChange={(e) => {
if (!e.target.files || e.target.files?.length === 0) return;
+ if (e.target.files.length > 10) {
+ return toast({
+ status: 'warning',
+ title: t('file.Select a maximum of 10 files')
+ });
+ }
onSelect(Array.from(e.target.files));
}}
/>
),
- [fileType, multiple]
+ [fileType, multiple, t, toast]
);
const onOpen = useCallback(() => {
diff --git a/client/src/pages/api/plugins/urlFetch.ts b/client/src/pages/api/plugins/urlFetch.ts
index e5d8d81dd..1ff3df6a4 100644
--- a/client/src/pages/api/plugins/urlFetch.ts
+++ b/client/src/pages/api/plugins/urlFetch.ts
@@ -5,12 +5,9 @@ import { JSDOM } from 'jsdom';
import { Readability } from '@mozilla/readability';
import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
+import type { FetchResultItem } from '@/types/plugin';
+import { simpleText } from '@/utils/file';
-type FetchResultItem = {
- url: string;
- title: string;
- content: string;
-};
export type UrlFetchResponse = FetchResultItem[];
const fetchContent = async (req: NextApiRequest, res: NextApiResponse) => {
@@ -38,10 +35,11 @@ const fetchContent = async (req: NextApiRequest, res: NextApiResponse) => {
const reader = new Readability(dom.window.document);
const article = reader.parse();
+ const content = article?.textContent || '';
+
return {
url,
- title: article?.title || '',
- content: article?.textContent || ''
+ content: simpleText(`${article?.title}\n${content}`)
};
})
)
diff --git a/client/src/pages/kb/detail/components/DataCard.tsx b/client/src/pages/kb/detail/components/DataCard.tsx
index 4ef1c26ad..d489affb7 100644
--- a/client/src/pages/kb/detail/components/DataCard.tsx
+++ b/client/src/pages/kb/detail/components/DataCard.tsx
@@ -217,7 +217,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
{item.a}
-
+
{item.source?.trim()}
{
- const model = vectorModelList[0]?.model;
+ const model = vectorModelList[0]?.model || 'text-embedding-ada-002';
const unitPrice = vectorModelList[0]?.price || 0.2;
const theme = useTheme();
const router = useRouter();
@@ -52,7 +39,6 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
const [chunkLen, setChunkLen] = useState(500);
const [showRePreview, setShowRePreview] = useState(false);
- const [selecting, setSelecting] = useState(false);
const [files, setFiles] = useState([]);
const [previewFile, setPreviewFile] = useState();
const [successChunks, setSuccessChunks] = useState(0);
@@ -72,73 +58,9 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
content: `该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。`
});
- const onSelectFile = useCallback(
- async (files: File[]) => {
- setSelecting(true);
- try {
- let promise = Promise.resolve();
- files.forEach((file) => {
- promise = promise.then(async () => {
- const extension = file?.name?.split('.')?.pop()?.toLowerCase();
- const icon = fileImgs.find((item) => new RegExp(item.reg).test(file.name))?.src;
- const text = await (async () => {
- switch (extension) {
- case 'txt':
- case 'md':
- return readTxtContent(file);
- case 'pdf':
- return readPdfContent(file);
- case 'doc':
- case 'docx':
- return readDocContent(file);
- }
- return '';
- })();
-
- if (icon && text) {
- const splitRes = splitText2Chunks({
- text: text,
- maxLen: chunkLen
- });
-
- setFiles((state) => [
- {
- id: nanoid(),
- filename: file.name,
- text,
- icon,
- ...splitRes
- },
- ...state
- ]);
- }
- });
- });
- await promise;
- } catch (error: any) {
- console.log(error);
- toast({
- title: typeof error === 'string' ? error : '解析文件失败',
- status: 'error'
- });
- }
- setSelecting(false);
- },
- [chunkLen, toast]
- );
-
const { mutate: onclickUpload, isLoading: uploading } = useMutation({
mutationFn: async () => {
- const chunks: { a: string; q: string; source: string }[] = [];
- files.forEach((file) =>
- file.chunks.forEach((chunk) => {
- chunks.push({
- q: chunk,
- a: '',
- source: file.filename
- });
- })
- );
+ const chunks = files.map((file) => file.chunks).flat();
// subsection import
let success = 0;
@@ -177,18 +99,22 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
const onRePreview = useCallback(async () => {
try {
- const splitRes = files.map((item) =>
- splitText2Chunks({
- text: item.text,
- maxLen: chunkLen
- })
- );
-
setFiles((state) =>
- state.map((file, index) => ({
- ...file,
- ...splitRes[index]
- }))
+ state.map((file) => {
+ const splitRes = splitText2Chunks({
+ text: file.text,
+ maxLen: chunkLen
+ });
+ return {
+ ...file,
+ tokens: splitRes.tokens,
+ chunks: splitRes.chunks.map((chunk) => ({
+ q: chunk,
+ a: '',
+ source: file.filename
+ }))
+ };
+ })
);
setPreviewFile(undefined);
setShowRePreview(false);
@@ -198,7 +124,12 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
title: getErrText(error, '文本分段异常')
});
}
- }, [chunkLen, files, toast]);
+ }, [chunkLen, toast]);
+
+ const filenameStyles = {
+ className: 'textEllipsis',
+ maxW: '400px'
+ };
return (
@@ -212,8 +143,10 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
>
{
+ setFiles((state) => files.concat(state));
+ }}
+ chunkLen={chunkLen}
py={emptyFiles ? '100px' : 5}
/>
@@ -241,7 +174,7 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
onClick={() => setPreviewFile(item)}
>
-
+
{item.filename}
{
pt={[4, 8]}
bg={'myWhite.400'}
>
-
+
{previewFile.filename}
{
) : (
-
- 分段预览({totalChunk}组)
-
+
+
+ 分段预览({totalChunk}组)
+
+ {totalChunk > 100 && (
+
+ 仅展示部分
+
+ )}
+
{files.map((file) =>
- file.chunks.map((item, i) => (
+ file.chunks.slice(0, 50).map((chunk, i) => (
{
_hover={{ ...hoverDeleteStyles }}
>
-
+
# {i + 1}
+
+ {file.filename}
+
{
@@ -417,11 +366,12 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}
contentEditable
- dangerouslySetInnerHTML={{ __html: item }}
+ dangerouslySetInnerHTML={{ __html: chunk.q }}
onBlur={(e) => {
// @ts-ignore
const val = e.target.innerText;
+ /* delete file */
if (val === '') {
setFiles((state) =>
state.map((stateFile) =>
@@ -437,14 +387,16 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
)
);
} else {
- setFiles((state) =>
- state.map((stateFile) =>
- stateFile.id === file.id
+ // update file
+ setFiles((stateFiles) =>
+ stateFiles.map((stateFile) =>
+ file.id === stateFile.id
? {
- ...file,
- chunks: file.chunks.map((chunk, index) =>
- i === index ? val : chunk
- )
+ ...stateFile,
+ chunks: stateFile.chunks.map((chunk, index) => ({
+ ...chunk,
+ q: i === index ? val : chunk.q
+ }))
}
: stateFile
)
diff --git a/client/src/pages/kb/detail/components/Import/CreateFileModal.tsx b/client/src/pages/kb/detail/components/Import/CreateFileModal.tsx
new file mode 100644
index 000000000..213d18cbb
--- /dev/null
+++ b/client/src/pages/kb/detail/components/Import/CreateFileModal.tsx
@@ -0,0 +1,63 @@
+import React from 'react';
+import { useTranslation } from 'next-i18next';
+import MyModal from '@/components/MyModal';
+import { Box, Input, Textarea, ModalBody, ModalFooter, Button } from '@chakra-ui/react';
+import { useForm } from 'react-hook-form';
+
+const CreateFileModal = ({
+ onClose,
+ onSuccess
+}: {
+ onClose: () => void;
+ onSuccess: (e: { filename: string; content: string }) => void;
+}) => {
+ const { t } = useTranslation();
+ const { register, handleSubmit } = useForm({
+ defaultValues: {
+ filename: '',
+ content: ''
+ }
+ });
+
+ return (
+
+
+
+ 文件名
+
+
+
+ 文件内容
+
+
+
+
+
+
+
+
+ );
+};
+
+export default CreateFileModal;
diff --git a/client/src/pages/kb/detail/components/Import/Csv.tsx b/client/src/pages/kb/detail/components/Import/Csv.tsx
index 81d920866..66c1d1a8f 100644
--- a/client/src/pages/kb/detail/components/Import/Csv.tsx
+++ b/client/src/pages/kb/detail/components/Import/Csv.tsx
@@ -8,28 +8,18 @@ import { getErrText } from '@/utils/tools';
import { vectorModelList } from '@/store/static';
import MyIcon from '@/components/Icon';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
-import { customAlphabet } from 'nanoid';
import { TrainingModeEnum } from '@/constants/plugin';
-import FileSelect from './FileSelect';
+import FileSelect, { type FileItemType } from './FileSelect';
import { useRouter } from 'next/router';
-const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
-import { readCsvContent } from '@/utils/file';
const fileExtension = '.csv';
-type FileItemType = {
- id: string;
- filename: string;
- chunks: { q: string; a: string; source?: string }[];
-};
-
const CsvImport = ({ kbId }: { kbId: string }) => {
const model = vectorModelList[0]?.model;
const theme = useTheme();
const router = useRouter();
const { toast } = useToast();
- const [selecting, setSelecting] = useState(false);
const [files, setFiles] = useState([]);
const [successChunks, setSuccessChunks] = useState(0);
@@ -43,58 +33,9 @@ const CsvImport = ({ kbId }: { kbId: string }) => {
content: `该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。`
});
- const onSelectFile = useCallback(
- async (files: File[]) => {
- setSelecting(true);
- try {
- let promise = Promise.resolve();
- files
- .filter((file) => /csv$/.test(file.name))
- .forEach((file) => {
- promise = promise.then(async () => {
- const { header, data } = await readCsvContent(file);
- if (header[0] !== 'question' || header[1] !== 'answer') {
- throw new Error('csv 文件格式有误,请确保 question 和 answer 两列');
- }
-
- setFiles((state) => [
- {
- id: nanoid(),
- filename: file.name,
- chunks: data.map((item) => ({
- q: item[0],
- a: item[1],
- source: item[2]
- }))
- },
- ...state
- ]);
- });
- });
- await promise;
- } catch (error: any) {
- console.log(error);
- toast({
- title: getErrText(error, '解析文件失败'),
- status: 'error'
- });
- }
- setSelecting(false);
- },
- [toast]
- );
-
const { mutate: onclickUpload, isLoading: uploading } = useMutation({
mutationFn: async () => {
- const chunks: { a: string; q: string; source: string }[] = [];
- files.forEach((file) =>
- file.chunks.forEach((chunk) => {
- chunks.push({
- ...chunk,
- source: chunk.source || file.filename
- });
- })
- );
+ const chunks = files.map((file) => file.chunks).flat();
// subsection import
let success = 0;
@@ -131,6 +72,10 @@ const CsvImport = ({ kbId }: { kbId: string }) => {
}
});
+ const filenameStyles = {
+ className: 'textEllipsis',
+ maxW: '400px'
+ };
return (
{
>
setFiles((state) => files.concat(state))}
+ showUrlFetch={false}
+ showCreateFile={false}
py={emptyFiles ? '100px' : 5}
isCsv
/>
@@ -169,7 +115,7 @@ const CsvImport = ({ kbId }: { kbId: string }) => {
_hover={{ ...hoverDeleteStyles }}
>
-
+
{item.filename}
{
{!emptyFiles && (
-
- 数据预览({totalChunk}组)
-
+
+
+ 分段预览({totalChunk}组)
+
+ {totalChunk > 100 && (
+
+ 仅展示部分
+
+ )}
+
{files.map((file) =>
file.chunks.slice(0, 100).map((item, i) => (
diff --git a/client/src/pages/kb/detail/components/Import/FileSelect.tsx b/client/src/pages/kb/detail/components/Import/FileSelect.tsx
index 15c52c9fc..791308017 100644
--- a/client/src/pages/kb/detail/components/Import/FileSelect.tsx
+++ b/client/src/pages/kb/detail/components/Import/FileSelect.tsx
@@ -2,30 +2,53 @@ import MyIcon from '@/components/Icon';
import { useLoading } from '@/hooks/useLoading';
import { useSelectFile } from '@/hooks/useSelectFile';
import { useToast } from '@/hooks/useToast';
-import { fileDownload } from '@/utils/file';
-import { Box, Flex, Text, type BoxProps } from '@chakra-ui/react';
+import { fileDownload, readCsvContent, simpleText, splitText2Chunks } from '@/utils/file';
+import { Box, Flex, useDisclosure, type BoxProps } from '@chakra-ui/react';
+import { fileImgs } from '@/constants/common';
import { DragEvent, useCallback, useState } from 'react';
import { useTranslation } from 'next-i18next';
+import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
+import { customAlphabet } from 'nanoid';
+import dynamic from 'next/dynamic';
+import MyTooltip from '@/components/MyTooltip';
+import { FetchResultItem } from '@/types/plugin';
+const UrlFetchModal = dynamic(() => import('./UrlFetchModal'));
+const CreateFileModal = dynamic(() => import('./CreateFileModal'));
+
+const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
+const csvTemplate = `question,answer,source\n"什么是 laf","laf 是一个云函数开发平台……","laf git doc"\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……","sealos git doc"`;
+
+export type FileItemType = {
+ id: string;
+ filename: string;
+ chunks: { q: string; a: string; source?: string }[];
+ text: string;
+ icon: string;
+ tokens: number;
+};
interface Props extends BoxProps {
fileExtension: string;
+ onPushFiles: (files: FileItemType[]) => void;
tipText?: string;
- onSelectFile: (files: File[]) => Promise;
- isLoading?: boolean;
+ chunkLen?: number;
isCsv?: boolean;
+ showUrlFetch?: boolean;
+ showCreateFile?: boolean;
}
const FileSelect = ({
fileExtension,
- onSelectFile,
- isLoading,
+ onPushFiles,
tipText,
+ chunkLen = 500,
isCsv = false,
+ showUrlFetch = true,
+ showCreateFile = true,
...props
}: Props) => {
const { Loading: FileSelectLoading } = useLoading();
const { t } = useTranslation();
- const csvTemplate = `question,answer,source\n"什么是 laf","laf 是一个云函数开发平台……","laf git doc"\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……","sealos git doc"`;
const { toast } = useToast();
@@ -35,6 +58,154 @@ const FileSelect = ({
});
const [isDragging, setIsDragging] = useState(false);
+ const [selecting, setSelecting] = useState(false);
+
+ const {
+ isOpen: isOpenUrlFetch,
+ onOpen: onOpenUrlFetch,
+ onClose: onCloseUrlFetch
+ } = useDisclosure();
+ const {
+ isOpen: isOpenCreateFile,
+ onOpen: onOpenCreateFile,
+ onClose: onCloseCreateFile
+ } = useDisclosure();
+
+ const onSelectFile = useCallback(
+ async (files: File[]) => {
+ setSelecting(true);
+ try {
+ // Parse file by file
+ let promise = Promise.resolve([]);
+ files.forEach((file) => {
+ promise = promise.then(async (result) => {
+ const extension = file?.name?.split('.')?.pop()?.toLowerCase();
+
+ /* text file */
+ const icon = fileImgs.find((item) => new RegExp(item.reg).test(file.name))?.src;
+ let text = await (async () => {
+ switch (extension) {
+ case 'txt':
+ case 'md':
+ return readTxtContent(file);
+ case 'pdf':
+ return readPdfContent(file);
+ case 'doc':
+ case 'docx':
+ return readDocContent(file);
+ }
+ return '';
+ })();
+
+ if (!icon) return result;
+
+ if (text) {
+ text = simpleText(text);
+ const splitRes = splitText2Chunks({
+ text,
+ maxLen: chunkLen
+ });
+ const fileItem: FileItemType = {
+ id: nanoid(),
+ filename: file.name,
+ icon,
+ text,
+ tokens: splitRes.tokens,
+ chunks: splitRes.chunks.map((chunk) => ({
+ q: chunk,
+ a: '',
+ source: file.name
+ }))
+ };
+ return [fileItem].concat(result);
+ }
+
+ /* csv file */
+ if (extension === 'csv') {
+ const { header, data } = await readCsvContent(file);
+ if (header[0] !== 'question' || header[1] !== 'answer') {
+ throw new Error('csv 文件格式有误,请确保 question 和 answer 两列');
+ }
+ const fileItem: FileItemType = {
+ id: nanoid(),
+ filename: file.name,
+ icon,
+ tokens: 0,
+ text: '',
+ chunks: data.map((item) => ({
+ q: item[0],
+ a: item[1],
+ source: item[2] || file.name
+ }))
+ };
+ return [fileItem].concat(result);
+ }
+ return result;
+ });
+ });
+
+ const chunkFiles = await promise;
+
+ onPushFiles(chunkFiles);
+ } catch (error: any) {
+ console.log(error);
+ toast({
+ title: typeof error === 'string' ? error : '解析文件失败',
+ status: 'error'
+ });
+ }
+ setSelecting(false);
+ },
+ [chunkLen, onPushFiles, toast]
+ );
+ const onUrlFetch = useCallback(
+ (e: FetchResultItem[]) => {
+ const result = e.map(({ url, content }) => {
+ const splitRes = splitText2Chunks({
+ text: content,
+ maxLen: chunkLen
+ });
+ return {
+ id: nanoid(),
+ filename: url,
+ icon: '/imgs/files/url.svg',
+ text: content,
+ tokens: splitRes.tokens,
+ chunks: splitRes.chunks.map((chunk) => ({
+ q: chunk,
+ a: '',
+ source: url
+ }))
+ };
+ });
+ onPushFiles(result);
+ },
+ [chunkLen, onPushFiles]
+ );
+ const onCreateFile = useCallback(
+ ({ filename, content }: { filename: string; content: string }) => {
+ content = simpleText(content);
+ const splitRes = splitText2Chunks({
+ text: content,
+ maxLen: chunkLen
+ });
+ onPushFiles([
+ {
+ id: nanoid(),
+ filename,
+ icon: '/imgs/files/txt.svg',
+ text: content,
+ tokens: splitRes.tokens,
+ chunks: splitRes.chunks.map((chunk) => ({
+ q: chunk,
+ a: '',
+ source: filename
+ }))
+ }
+ ]);
+ },
+ [chunkLen, onPushFiles]
+ );
const handleDragEnter = (e: DragEvent) => {
e.preventDefault();
@@ -46,56 +217,69 @@ const FileSelect = ({
setIsDragging(false);
};
- const handleDrop = useCallback(async (e: DragEvent) => {
- e.preventDefault();
- setIsDragging(false);
+ const handleDrop = useCallback(
+ async (e: DragEvent) => {
+ e.preventDefault();
+ setIsDragging(false);
- const items = e.dataTransfer.items;
- const fileList: File[] = [];
+ const items = e.dataTransfer.items;
+ const fileList: File[] = [];
- if (e.dataTransfer.items.length <= 1) {
- const traverseFileTree = async (item: any) => {
- return new Promise((resolve, reject) => {
- if (item.isFile) {
- item.file((file: File) => {
- fileList.push(file);
- resolve();
- });
- } else if (item.isDirectory) {
- const dirReader = item.createReader();
- dirReader.readEntries(async (entries: any[]) => {
- for (let i = 0; i < entries.length; i++) {
- await traverseFileTree(entries[i]);
- }
- resolve();
- });
+ if (e.dataTransfer.items.length <= 1) {
+ const traverseFileTree = async (item: any) => {
+ return new Promise((resolve, reject) => {
+ if (item.isFile) {
+ item.file((file: File) => {
+ fileList.push(file);
+ resolve();
+ });
+ } else if (item.isDirectory) {
+ const dirReader = item.createReader();
+ dirReader.readEntries(async (entries: any[]) => {
+ for (let i = 0; i < entries.length; i++) {
+ await traverseFileTree(entries[i]);
+ }
+ resolve();
+ });
+ }
+ });
+ };
+
+ for (let i = 0; i < items.length; i++) {
+ const item = items[i].webkitGetAsEntry();
+ if (item) {
+ await traverseFileTree(item);
}
- });
- };
+ }
+ } else {
+ const files = Array.from(e.dataTransfer.files);
+ let isErr = files.some((item) => item.type === '');
+ if (isErr) {
+ return toast({
+ title: t('file.upload error description'),
+ status: 'error'
+ });
+ }
- for (let i = 0; i < items.length; i++) {
- const item = items[i].webkitGetAsEntry();
- if (item) {
- await traverseFileTree(item);
+ for (let i = 0; i < files.length; i++) {
+ fileList.push(files[i]);
}
}
- } else {
- const files = Array.from(e.dataTransfer.files);
- let isErr = files.some((item) => item.type === '');
- if (isErr) {
- return toast({
- title: t('file.upload error description'),
- status: 'error'
- });
- }
- for (let i = 0; i < files.length; i++) {
- fileList.push(files[i]);
- }
+ onSelectFile(fileList);
+ },
+ [onSelectFile, t, toast]
+ );
+
+ const SelectTextStyles: BoxProps = {
+ ml: 1,
+ as: 'span',
+ cursor: 'pointer',
+ color: 'myBlue.700',
+ _hover: {
+ textDecoration: 'underline'
}
-
- onSelectFile(fileList);
- }, []);
+ };
return (
- {t('file.Drag and drop')}
-
- {t('file.select a document')}
-
+ {t('file.Drag and drop')},
+
+
+ {t('file.select a document')}
+
+
+ {showUrlFetch && (
+ <>
+ ,
+
+ {t('file.Fetch Url')}
+
+ >
+ )}
+ {showCreateFile && (
+ <>
+ ,
+
+ {t('file.Create file')}
+
+ >
+ )}
)}
@@ -151,8 +353,10 @@ const FileSelect = ({
{t('file.Click to download CSV template')}
)}
-
+
+ {isOpenUrlFetch && }
+ {isOpenCreateFile && }
);
};
diff --git a/client/src/pages/kb/detail/components/Import/QA.tsx b/client/src/pages/kb/detail/components/Import/QA.tsx
index 5cb1fa32c..51be31ae3 100644
--- a/client/src/pages/kb/detail/components/Import/QA.tsx
+++ b/client/src/pages/kb/detail/components/Import/QA.tsx
@@ -2,7 +2,6 @@ import React, { useState, useCallback, useMemo } from 'react';
import { Box, Flex, Button, useTheme, Image, Input } from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast';
import { useConfirm } from '@/hooks/useConfirm';
-import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
import { useMutation } from '@tanstack/react-query';
import { postKbDataFromList } from '@/api/plugins/kb';
import { splitText2Chunks } from '@/utils/file';
@@ -14,24 +13,12 @@ import CloseIcon from '@/components/Icon/close';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
-import { fileImgs } from '@/constants/common';
-import { customAlphabet } from 'nanoid';
import { TrainingModeEnum } from '@/constants/plugin';
-import FileSelect from './FileSelect';
+import FileSelect, { type FileItemType } from './FileSelect';
import { useRouter } from 'next/router';
-const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
-type FileItemType = {
- id: string;
- filename: string;
- text: string;
- icon: string;
- chunks: string[];
- tokens: number;
-};
-
const QAImport = ({ kbId }: { kbId: string }) => {
const model = qaModelList[0]?.model;
const unitPrice = qaModelList[0]?.price || 3;
@@ -40,7 +27,6 @@ const QAImport = ({ kbId }: { kbId: string }) => {
const router = useRouter();
const { toast } = useToast();
- const [selecting, setSelecting] = useState(false);
const [files, setFiles] = useState([]);
const [showRePreview, setShowRePreview] = useState(false);
const [previewFile, setPreviewFile] = useState();
@@ -62,77 +48,13 @@ const QAImport = ({ kbId }: { kbId: string }) => {
content: `该任务无法终止!导入后会自动调用大模型生成问答对,会有一些细节丢失,请确认!如果余额不足,未完成的任务会被暂停。`
});
- const onSelectFile = useCallback(
- async (files: File[]) => {
- setSelecting(true);
- try {
- let promise = Promise.resolve();
- files.forEach((file) => {
- promise = promise.then(async () => {
- const extension = file?.name?.split('.')?.pop()?.toLowerCase();
- const icon = fileImgs.find((item) => new RegExp(item.reg).test(file.name))?.src;
- const text = await (async () => {
- switch (extension) {
- case 'txt':
- case 'md':
- return readTxtContent(file);
- case 'pdf':
- return readPdfContent(file);
- case 'doc':
- case 'docx':
- return readDocContent(file);
- }
- return '';
- })();
-
- if (icon && text) {
- const splitRes = splitText2Chunks({
- text: text,
- maxLen: chunkLen
- });
-
- setFiles((state) => [
- {
- id: nanoid(),
- filename: file.name,
- text,
- icon,
- ...splitRes
- },
- ...state
- ]);
- }
- });
- });
- await promise;
- } catch (error: any) {
- console.log(error);
- toast({
- title: typeof error === 'string' ? error : '解析文件失败',
- status: 'error'
- });
- }
- setSelecting(false);
- },
- [chunkLen, toast]
- );
-
const { mutate: onclickUpload, isLoading: uploading } = useMutation({
mutationFn: async () => {
- const chunks: { a: string; q: string; source: string }[] = [];
- files.forEach((file) =>
- file.chunks.forEach((chunk) => {
- chunks.push({
- q: chunk,
- a: '',
- source: file.filename
- });
- })
- );
+ const chunks = files.map((file) => file.chunks).flat();
// subsection import
let success = 0;
- const step = 500;
+ const step = 300;
for (let i = 0; i < chunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
@@ -168,18 +90,22 @@ const QAImport = ({ kbId }: { kbId: string }) => {
const onRePreview = useCallback(async () => {
try {
- const splitRes = files.map((item) =>
- splitText2Chunks({
- text: item.text,
- maxLen: chunkLen
- })
- );
-
setFiles((state) =>
- state.map((file, index) => ({
- ...file,
- ...splitRes[index]
- }))
+ state.map((file) => {
+ const splitRes = splitText2Chunks({
+ text: file.text,
+ maxLen: chunkLen
+ });
+ return {
+ ...file,
+ tokens: splitRes.tokens,
+ chunks: splitRes.chunks.map((chunk) => ({
+ q: chunk,
+ a: '',
+ source: file.filename
+ }))
+ };
+ })
);
setPreviewFile(undefined);
setShowRePreview(false);
@@ -189,7 +115,12 @@ const QAImport = ({ kbId }: { kbId: string }) => {
title: getErrText(error, '文本分段异常')
});
}
- }, [chunkLen, files, toast]);
+ }, [chunkLen, toast]);
+
+ const filenameStyles = {
+ className: 'textEllipsis',
+ maxW: '400px'
+ };
return (
@@ -203,8 +134,10 @@ const QAImport = ({ kbId }: { kbId: string }) => {
>
{
+ setFiles((state) => files.concat(state));
+ }}
+ chunkLen={chunkLen}
py={emptyFiles ? '100px' : 5}
/>
@@ -232,7 +165,7 @@ const QAImport = ({ kbId }: { kbId: string }) => {
onClick={() => setPreviewFile(item)}
>
-
+
{item.filename}
{
pt={[4, 8]}
bg={'myWhite.400'}
>
-
+
{previewFile.filename}
{
) : (
-
- 分段预览({totalChunk}组)
-
+
+
+ 分段预览({totalChunk}组)
+
+ {totalChunk > 100 && (
+
+ 仅展示部分
+
+ )}
+
{files.map((file) =>
- file.chunks.map((item, i) => (
+ file.chunks.slice(0, 30).map((chunk, i) => (
{
# {i + 1}
+
+ {file.filename}
+
{
@@ -397,11 +340,12 @@ const QAImport = ({ kbId }: { kbId: string }) => {
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}
contentEditable
- dangerouslySetInnerHTML={{ __html: item }}
+ dangerouslySetInnerHTML={{ __html: chunk.q }}
onBlur={(e) => {
// @ts-ignore
const val = e.target.innerText;
+ /* delete file */
if (val === '') {
setFiles((state) =>
state.map((stateFile) =>
@@ -417,14 +361,16 @@ const QAImport = ({ kbId }: { kbId: string }) => {
)
);
} else {
- setFiles((state) =>
- state.map((stateFile) =>
- stateFile.id === file.id
+ // update file
+ setFiles((stateFiles) =>
+ stateFiles.map((stateFile) =>
+ file.id === stateFile.id
? {
- ...file,
- chunks: file.chunks.map((chunk, index) =>
- i === index ? val : chunk
- )
+ ...stateFile,
+ chunks: stateFile.chunks.map((chunk, index) => ({
+ ...chunk,
+ q: i === index ? val : chunk.q
+ }))
}
: stateFile
)
diff --git a/client/src/pages/kb/detail/components/Import/UrlFetchModal.tsx b/client/src/pages/kb/detail/components/Import/UrlFetchModal.tsx
new file mode 100644
index 000000000..4d994b905
--- /dev/null
+++ b/client/src/pages/kb/detail/components/Import/UrlFetchModal.tsx
@@ -0,0 +1,67 @@
+import React, { useRef } from 'react';
+import { useTranslation } from 'next-i18next';
+import MyModal from '@/components/MyModal';
+import { Box, Button, ModalBody, ModalFooter, Textarea } from '@chakra-ui/react';
+import type { FetchResultItem } from '@/types/plugin';
+import { useRequest } from '@/hooks/useRequest';
+import { fetchUrls } from '@/api/plugins/common';
+
+const UrlFetchModal = ({
+ onClose,
+ onSuccess
+}: {
+ onClose: () => void;
+ onSuccess: (e: FetchResultItem[]) => void;
+}) => {
+ const { t } = useTranslation();
+ const Dom = useRef(null);
+
+ const { mutate, isLoading } = useRequest({
+ mutationFn: async () => {
+ const val = Dom.current?.value || '';
+ const urls = val.split('\n').filter((e) => e);
+ const res = await fetchUrls(urls);
+
+ onSuccess(res);
+ onClose();
+ },
+ errorToast: '获取链接失败'
+ });
+
+ return (
+
+ {t('file.Fetch Url')}
+
+ 目前仅支持读取静态链接,请注意检查结果
+
+ >
+ }
+ top={'15vh'}
+ isOpen
+ onClose={onClose}
+ w={'600px'}
+ >
+
+
+
+
+
+
+
+
+ );
+};
+
+export default UrlFetchModal;
diff --git a/client/src/types/plugin.d.ts b/client/src/types/plugin.d.ts
index 5d38733f5..f43fb795b 100644
--- a/client/src/types/plugin.d.ts
+++ b/client/src/types/plugin.d.ts
@@ -28,3 +28,8 @@ export type KbTestItemType = {
time: Date;
results: (KbDataItemType & { score: number })[];
};
+
+export type FetchResultItem = {
+ url: string;
+ content: string;
+};
diff --git a/client/src/utils/file.ts b/client/src/utils/file.ts
index c5f82a8c9..57ee3dac3 100644
--- a/client/src/utils/file.ts
+++ b/client/src/utils/file.ts
@@ -151,7 +151,7 @@ export const splitText2Chunks = ({ text, maxLen }: { text: string; maxLen: numbe
const overlapLen = Math.floor(maxLen * 0.3); // Overlap length
try {
- const splitTexts = text.split(/(?<=[。!?.!?])/g);
+ const splitTexts = text.split(/(?<=[。!?;.!?;])/g);
const chunks: string[] = [];
let preChunk = '';
@@ -268,3 +268,11 @@ export const compressImg = ({
reject('压缩图片异常');
};
});
+
+/* simple text, remove chinese space and extra \n */
+export const simpleText = (text: string) => {
+ text = text.replace(/([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])/g, '$1$2');
+ text = text.replace(/\n{2,}/g, '\n');
+ text = text.replace(/\s{2,}/g, ' ');
+ return text;
+};
diff --git a/docSite/docs/develop/deploy/docker.md b/docSite/docs/develop/deploy/docker.md
index 001488f59..6dd1f52c4 100644
--- a/docSite/docs/develop/deploy/docker.md
+++ b/docSite/docs/develop/deploy/docker.md
@@ -69,8 +69,8 @@ services:
- ./mongo/data:/data/db
fastgpt:
container_name: fastgpt
- # image: c121914yu/fast-gpt:latest # docker hub
- image: ghcr.io/labring/fastgpt:latest # 阿里云
+ # image: ghcr.io/labring/fastgpt:latest # git
+ image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:latest # 阿里云
ports:
- 3000:3000
networks:
diff --git a/docSite/docs/develop/update/41init.md b/docSite/docs/develop/update/41init.md
index b7d21d021..8aa9bad5b 100644
--- a/docSite/docs/develop/update/41init.md
+++ b/docSite/docs/develop/update/41init.md
@@ -15,6 +15,6 @@
## 执行初始化 API
-部署新版项目,并发起 3 个 HTTP 请求(记得携带 headers.rootkey,这个值是环境变量里的)
+部署新版项目,并发起 1 个 HTTP 请求(记得携带 headers.rootkey,这个值是环境变量里的)
https://xxxxx/api/admin/initChatItem
diff --git a/docSite/i18n/zh-Hans/docusaurus-plugin-content-docs/current/develop/update/41init.md b/docSite/i18n/zh-Hans/docusaurus-plugin-content-docs/current/develop/update/41init.md
index b7d21d021..8aa9bad5b 100644
--- a/docSite/i18n/zh-Hans/docusaurus-plugin-content-docs/current/develop/update/41init.md
+++ b/docSite/i18n/zh-Hans/docusaurus-plugin-content-docs/current/develop/update/41init.md
@@ -15,6 +15,6 @@
## 执行初始化 API
-部署新版项目,并发起 3 个 HTTP 请求(记得携带 headers.rootkey,这个值是环境变量里的)
+部署新版项目,并发起 1 个 HTTP 请求(记得携带 headers.rootkey,这个值是环境变量里的)
https://xxxxx/api/admin/initChatItem