diff --git a/docs/deploy/fastgpt/pg/init.sql b/docs/deploy/fastgpt/pg/init.sql index bbf64f3cf..e9de5f0df 100644 --- a/docs/deploy/fastgpt/pg/init.sql +++ b/docs/deploy/fastgpt/pg/init.sql @@ -8,8 +8,9 @@ CREATE TABLE IF NOT EXISTS modelData ( vector VECTOR(1536) NOT NULL, user_id VARCHAR(50) NOT NULL, kb_id VARCHAR(50) NOT NULL, + source VARCHAR(100), q TEXT NOT NULL, - a TEXT NOT NULL + a TEXT NOT NULL, ); -- 索引设置,按需取 -- CREATE INDEX IF NOT EXISTS modelData_userId_index ON modelData USING HASH (user_id); diff --git a/src/components/Avatar/index.tsx b/src/components/Avatar/index.tsx index af9ef3540..acf426d03 100644 --- a/src/components/Avatar/index.tsx +++ b/src/components/Avatar/index.tsx @@ -4,7 +4,6 @@ import type { ImageProps } from '@chakra-ui/react'; import { LOGO_ICON } from '@/constants/chat'; const Avatar = ({ w = '30px', ...props }: ImageProps) => { - console.log(props.src); return ( { const text = item.q + item.a; @@ -79,11 +77,12 @@ export async function pushDataToKb({ // 数据库去重 const insertData = ( await Promise.allSettled( - filterData.map(async ({ q, a = '' }) => { + filterData.map(async ({ q, a = '', source }) => { if (mode !== TrainingModeEnum.index) { return Promise.resolve({ q, - a + a, + source }); } @@ -112,19 +111,21 @@ export async function pushDataToKb({ } return Promise.resolve({ q, - a + a, + source }); }) ) ) .filter((item) => item.status === 'fulfilled') - .map<{ q: string; a: string }>((item: any) => item.value); + .map((item: any) => item.value); // 插入记录 await TrainingData.insertMany( insertData.map((item) => ({ q: item.q, a: item.a, + source: item.source, userId, kbId, mode, diff --git a/src/pages/api/openapi/kb/updateData.ts b/src/pages/api/openapi/kb/updateData.ts index 65a5724b5..ecb134dd6 100644 --- a/src/pages/api/openapi/kb/updateData.ts +++ b/src/pages/api/openapi/kb/updateData.ts @@ -32,6 +32,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex await PgClient.update('modelData', { where: [['id', dataId], 'AND', ['user_id', userId]], values: [ + { key: 'source', value: '手动修改' }, { key: 'a', value: a.replace(/'/g, '"') }, ...(q ? [ diff --git a/src/pages/api/plugins/kb/data/getDataById.ts b/src/pages/api/plugins/kb/data/getDataById.ts index 1c36eb85e..599ae04e2 100644 --- a/src/pages/api/plugins/kb/data/getDataById.ts +++ b/src/pages/api/plugins/kb/data/getDataById.ts @@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response'; import { connectToDatabase } from '@/service/mongo'; import { authUser } from '@/service/utils/auth'; import { PgClient } from '@/service/pg'; -import type { PgKBDataItemType } from '@/types/pg'; +import type { KbDataItemType } from '@/types/plugin'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -21,8 +21,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const where: any = [['user_id', userId], 'AND', ['id', dataId]]; - const searchRes = await PgClient.select('modelData', { - fields: ['id', 'q', 'a'], + const searchRes = await PgClient.select('modelData', { + fields: ['id', 'q', 'a', 'source'], where, limit: 1 }); diff --git a/src/pages/api/plugins/kb/data/getDataList.ts b/src/pages/api/plugins/kb/data/getDataList.ts index 8edb9855d..caa408cb3 100644 --- a/src/pages/api/plugins/kb/data/getDataList.ts +++ b/src/pages/api/plugins/kb/data/getDataList.ts @@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response'; import { connectToDatabase } from '@/service/mongo'; import { authUser } from '@/service/utils/auth'; import { PgClient } from '@/service/pg'; -import type { PgKBDataItemType } from '@/types/pg'; +import type { KbDataItemType } from '@/types/plugin'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -31,11 +31,16 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< ['user_id', userId], 'AND', ['kb_id', kbId], - ...(searchText ? ['AND', `(q LIKE '%${searchText}%' OR a LIKE '%${searchText}%')`] : []) + ...(searchText + ? [ + 'AND', + `(q LIKE '%${searchText}%' OR a LIKE '%${searchText}%' OR source LIKE '%${searchText}%')` + ] + : []) ]; - const searchRes = await PgClient.select('modelData', { - fields: ['id', 'q', 'a'], + const searchRes = await PgClient.select('modelData', { + fields: ['id', 'q', 'a', 'source'], where, order: [{ field: 'id', mode: 'DESC' }], limit: pageSize, diff --git a/src/pages/kb/components/DataCard.tsx b/src/pages/kb/components/DataCard.tsx index 6064a3b26..c6082b99a 100644 --- a/src/pages/kb/components/DataCard.tsx +++ b/src/pages/kb/components/DataCard.tsx @@ -1,4 +1,4 @@ -import React, { useCallback, useState, useRef } from 'react'; +import React, { useCallback, useState, useRef, useEffect } from 'react'; import { Box, TableContainer, @@ -56,7 +56,7 @@ const DataCard = ({ kbId }: { kbId: string }) => { const { toast } = useToast(); const { - data: modelDataList, + data: kbDataList, isLoading, Pagination, total, @@ -72,11 +72,6 @@ const DataCard = ({ kbId }: { kbId: string }) => { defaultRequest: false }); - useQuery(['getKbData', kbId], () => { - getData(1); - return null; - }); - const [editInputData, setEditInputData] = useState(); const { @@ -101,20 +96,14 @@ const DataCard = ({ kbId }: { kbId: string }) => { ); const refetchData = useCallback( - (num = 1) => { + (num = pageNum) => { getData(num); refetch(); return null; }, - [getData, refetch] + [getData, pageNum, refetch] ); - // interval get data - useQuery(['refetchData'], () => refetchData(pageNum), { - refetchInterval: 5000, - enabled: qaListLen > 0 || vectorListLen > 0 - }); - // get al data and export csv const { mutate: onclickExport, isLoading: isLoadingExport = false } = useMutation({ mutationFn: () => getExportDataList(kbId), @@ -148,6 +137,17 @@ const DataCard = ({ kbId }: { kbId: string }) => { } }); + // interval get data + useQuery(['refetchData'], () => refetchData(1), { + refetchInterval: 5000, + enabled: qaListLen > 0 || vectorListLen > 0 + }); + useQuery(['getKbData', kbId], () => { + setSearchText(''); + getData(1); + return null; + }); + return ( @@ -239,18 +239,22 @@ const DataCard = ({ kbId }: { kbId: string }) => { 补充知识 + 来源 操作 - {modelDataList.map((item) => ( - + {kbDataList.map((item) => ( + {item.q} {item.a || '-'} + + {item.source?.trim() || '-'} + ([]); + const [fileName, setFileName] = useState(''); const [successData, setSuccessData] = useState(0); const { openConfirm, ConfirmChild } = useConfirm({ content: '确认导入该数据集?' @@ -46,6 +47,7 @@ const SelectJsonModal = ({ async (e: File[]) => { const file = e[0]; setSelecting(true); + setFileName(file.name); try { const { header, data } = await readCsvContent(file); if (header[0] !== 'question' || header[1] !== 'answer') { @@ -75,11 +77,14 @@ const SelectJsonModal = ({ let success = 0; // subsection import - const step = 50; + const step = 100; for (let i = 0; i < fileData.length; i += step) { const { insertLen } = await postKbDataFromList({ kbId, - data: fileData.slice(i, i + step), + data: fileData.slice(i, i + step).map((item) => ({ + ...item, + source: fileName + })), mode: TrainingModeEnum.index }); success += insertLen || 0; @@ -129,13 +134,14 @@ const SelectJsonModal = ({ > 点击下载csv模板 - + - - 一共 {fileData.length} 组数据(下面最多展示100组) - + + 【{fileName}】一共有 {fileData.length} 组数据(下面最多展示100组) + + {fileData.slice(0, 100).map((item, index) => ( diff --git a/src/pages/kb/components/SelectFileModal.tsx b/src/pages/kb/components/SelectFileModal.tsx index fb103be73..268594837 100644 --- a/src/pages/kb/components/SelectFileModal.tsx +++ b/src/pages/kb/components/SelectFileModal.tsx @@ -1,4 +1,4 @@ -import React, { useState, useCallback, useMemo } from 'react'; +import React, { useState, useCallback } from 'react'; import { Box, Flex, @@ -54,15 +54,17 @@ const SelectFileModal = ({ const [prompt, setPrompt] = useState(''); const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true }); const [mode, setMode] = useState<`${TrainingModeEnum}`>(TrainingModeEnum.index); - const [fileTextArr, setFileTextArr] = useState(['']); + const [files, setFiles] = useState<{ filename: string; text: string }[]>([ + { filename: '文本1', text: '' } + ]); const [splitRes, setSplitRes] = useState<{ tokens: number; - chunks: string[]; + chunks: { filename: string; value: string }[]; successChunks: number; }>({ tokens: 0, - chunks: [], - successChunks: 0 + successChunks: 0, + chunks: [] }); const { openConfirm, ConfirmChild } = useConfirm({ content: `确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,未完成的任务会被直接清除。一共 ${ @@ -78,21 +80,21 @@ const SelectFileModal = ({ files.forEach((file) => { promise = promise.then(async () => { const extension = file?.name?.split('.')?.pop()?.toLowerCase(); - let text = ''; - switch (extension) { - case 'txt': - case 'md': - text = await readTxtContent(file); - break; - case 'pdf': - text = await readPdfContent(file); - break; - case 'doc': - case 'docx': - text = await readDocContent(file); - break; - } - text && setFileTextArr((state) => [text].concat(state)); + const text = await (async () => { + switch (extension) { + case 'txt': + case 'md': + return readTxtContent(file); + case 'pdf': + return readPdfContent(file); + case 'doc': + case 'docx': + return readDocContent(file); + } + return ''; + })(); + + text && setFiles((state) => [{ filename: file.name, text }].concat(state)); return; }); }); @@ -115,11 +117,13 @@ const SelectFileModal = ({ // subsection import let success = 0; - const step = 50; + const step = 100; for (let i = 0; i < splitRes.chunks.length; i += step) { const { insertLen } = await postKbDataFromList({ kbId, - data: splitRes.chunks.slice(i, i + step).map((text) => ({ q: text, a: '' })), + data: splitRes.chunks + .slice(i, i + step) + .map((item) => ({ q: item.value, a: '', source: item.filename })), prompt: `下面是"${prompt || '一段长文本'}"`, mode }); @@ -149,26 +153,32 @@ const SelectFileModal = ({ const onclickImport = useCallback(async () => { setBtnLoading(true); try { - let promise = Promise.resolve(); - - const splitRes = await Promise.all( - fileTextArr - .filter((item) => item) - .map((item) => - splitText_token({ - text: item, - ...modeMap[mode] - }) - ) - ); + const splitRes = files + .map((item) => + splitText_token({ + text: item.text, + ...modeMap[mode] + }) + ) + .map((item, i) => ({ + ...item, + filename: files[i].filename + })) + .filter((item) => item.tokens > 0); setSplitRes({ tokens: splitRes.reduce((sum, item) => sum + item.tokens, 0), - chunks: splitRes.map((item) => item.chunks).flat(), + chunks: splitRes + .map((item) => + item.chunks.map((chunk) => ({ + filename: item.filename, + value: chunk + })) + ) + .flat(), successChunks: 0 }); - await promise; openConfirm(mutate)(); } catch (error) { toast({ @@ -177,7 +187,7 @@ const SelectFileModal = ({ }); } setBtnLoading(false); - }, [fileTextArr, mode, mutate, openConfirm, toast]); + }, [files, mode, mutate, openConfirm, toast]); return ( @@ -204,7 +214,7 @@ const SelectFileModal = ({ > 支持 {fileExtension} 文件。Gpt会自动对文本进行 QA 拆分,需要较长训练时间,拆分需要消耗 - tokens,账号余额不足时,未拆分的数据会被删除。一个{fileTextArr.length}个文本。 + tokens,账号余额不足时,未拆分的数据会被删除。一个{files.length}个文本。 {/* 拆分模式 */} @@ -235,26 +245,26 @@ const SelectFileModal = ({ )} {/* 文本内容 */} - {fileTextArr.slice(0, 100).map((item, i) => ( + {files.slice(0, 100).map((item, i) => ( - 文本{i + 1} + {item.filename}