diff --git a/.vscode/settings.json b/.vscode/settings.json index 141c85306..eaf1fd95f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,7 +2,7 @@ "editor.formatOnSave": true, "editor.mouseWheelZoom": true, "typescript.tsdk": "node_modules/typescript/lib", - "editor.defaultFormatter": "esbenp.prettier-vscode", + "prettier.prettierPath": "./node_modules/prettier", "i18n-ally.localesPaths": [ "projects/app/public/locales" ], diff --git a/docSite/README.md b/docSite/README.md index 21668d49f..a3b8ba755 100644 --- a/docSite/README.md +++ b/docSite/README.md @@ -3,7 +3,7 @@ ## 本地运行 1. 安装 go 语言环境。 -2. 安装 hugo。 [二进制下载](https://github.com/gohugoio/hugo/releases/tag/v0.117.0) +2. 安装 hugo。 [二进制下载](https://github.com/gohugoio/hugo/releases/tag/v0.117.0),注意需要安装 extended 版本。 3. cd docSite 4. hugo serve 5. 访问 http://localhost:1313 diff --git a/docSite/content/docs/development/configuration.md b/docSite/content/docs/development/configuration.md index e238f4c38..325996e54 100644 --- a/docSite/content/docs/development/configuration.md +++ b/docSite/content/docs/development/configuration.md @@ -84,6 +84,14 @@ weight: 520 "maxToken": 16000, "price": 0, "prompt": "" + }, + "QGModel": { // 生成下一步指引模型 + "model": "gpt-3.5-turbo", + "name": "GPT35-4k", + "maxToken": 4000, + "price": 0, + "prompt": "", + "functionCall": false } } ``` diff --git a/docSite/content/docs/development/design/_index.md b/docSite/content/docs/development/design/_index.md new file mode 100644 index 000000000..b0564c0d7 --- /dev/null +++ b/docSite/content/docs/development/design/_index.md @@ -0,0 +1,8 @@ +--- +weight: 540 +title: "设计方案" +description: "FastGPT 部分设计方案" +icon: public +draft: false +images: [] +--- \ No newline at end of file diff --git a/docSite/content/docs/development/design/dataset.md b/docSite/content/docs/development/design/dataset.md new file mode 100644 index 000000000..bf731aab6 --- /dev/null +++ b/docSite/content/docs/development/design/dataset.md @@ -0,0 +1,25 @@ +--- +weight: 541 +title: "数据集" +description: "FastGPT 数据集中文件与数据的设计方案" +icon: dataset +draft: false +images: [] +--- + +## 文件与数据的关系 + +在 FastGPT 中,文件会通过 MongoDB 的 FS 存储,而具体的数据会通过 PostgreSQL 存储,PG 中的数据会有一列 file_id,关联对应的文件。考虑到旧版本的兼容,以及手动输入、标注数据等,我们给 file_id 增加了一些特殊的值,如下: + +- manual: 手动输入 +- mark: 手动标注的数据 + +注意,file_id 仅在插入数据时会写入,变更时无法修改。 + +## 文件导入流程 + +1. 上传文件到 MongoDB 的 FS 中,获取 file_id,此时文件标记为 `unused` 状态 +2. 浏览器解析文件,获取对应的文本和 chunk +3. 给每个 chunk 打上 file_id +4. 点击上传数据:将文件的状态改为 `used`,并将数据推送到 mongo `training` 表中等待训练 +5. 由训练线程从 mongo 中取数据,并在获取向量后插入到 pg。 \ No newline at end of file diff --git a/docSite/content/docs/installation/upgrading/447.md b/docSite/content/docs/installation/upgrading/447.md new file mode 100644 index 000000000..8160cbef0 --- /dev/null +++ b/docSite/content/docs/installation/upgrading/447.md @@ -0,0 +1,29 @@ +--- +title: 'V4.4.7' +description: 'FastGPT V4.4.7 更新(需执行升级脚本)' +icon: 'upgrade' +draft: false +toc: true +weight: 840 +--- + +## 执行初始化 API + +发起 1 个 HTTP 请求({{rootkey}} 替换成环境变量里的`rootkey`,{{host}}替换成自己域名) + +1. https://xxxxx/api/admin/initv445 + +```bash +curl --location --request POST 'https://{{host}}/api/admin/initv447' \ +--header 'rootkey: {{rootkey}}' \ +--header 'Content-Type: application/json' +``` + +初始化 pg 索引以及将 file_id 中空对象转成 manual 对象。如果数据多,可能需要较长时间,可以通过日志查看进度。 + +## 功能介绍 + +### Fast GPT V4.4.7 + +1. 优化了数据库文件 crud。 +2. 兼容链接读取,作为 source。 \ No newline at end of file diff --git a/packages/common/tools/file.ts b/packages/common/tools/file.ts new file mode 100644 index 000000000..fdcd357a4 --- /dev/null +++ b/packages/common/tools/file.ts @@ -0,0 +1,23 @@ +import { strIsLink } from './str'; + +export const fileImgs = [ + { suffix: 'pdf', src: '/imgs/files/pdf.svg' }, + { suffix: 'csv', src: '/imgs/files/csv.svg' }, + { suffix: '(doc|docs)', src: '/imgs/files/doc.svg' }, + { suffix: 'txt', src: '/imgs/files/txt.svg' }, + { suffix: 'md', src: '/imgs/files/markdown.svg' }, + { suffix: '.', src: '/imgs/files/file.svg' } +]; + +export function getFileIcon(name = '') { + return fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(name))?.src; +} +export function getSpecialFileIcon(name = '') { + if (name === 'manual') { + return '/imgs/files/manual.svg'; + } else if (name === 'mark') { + return '/imgs/files/mark.svg'; + } else if (strIsLink(name)) { + return '/imgs/files/link.svg'; + } +} diff --git a/packages/common/tools/str.ts b/packages/common/tools/str.ts new file mode 100644 index 000000000..cf88afad3 --- /dev/null +++ b/packages/common/tools/str.ts @@ -0,0 +1,5 @@ +export function strIsLink(str?: string) { + if (!str) return false; + if (/^((http|https)?:\/\/|www\.|\/)[^\s/$.?#].[^\s]*$/i.test(str)) return true; + return false; +} diff --git a/packages/core/dataset/constant.ts b/packages/core/dataset/constant.ts new file mode 100644 index 000000000..cdb62af3a --- /dev/null +++ b/packages/core/dataset/constant.ts @@ -0,0 +1,15 @@ +export enum DatasetSpecialIdEnum { + manual = 'manual', + mark = 'mark' +} +export const datasetSpecialIdMap = { + [DatasetSpecialIdEnum.manual]: { + name: 'kb.Manual Data', + sourceName: 'kb.Manual Input' + }, + [DatasetSpecialIdEnum.mark]: { + name: 'kb.Mark Data', + sourceName: 'kb.Manual Mark' + } +}; +export const datasetSpecialIds: string[] = [DatasetSpecialIdEnum.manual, DatasetSpecialIdEnum.mark]; diff --git a/packages/core/dataset/utils.ts b/packages/core/dataset/utils.ts new file mode 100644 index 000000000..d950d7d51 --- /dev/null +++ b/packages/core/dataset/utils.ts @@ -0,0 +1,8 @@ +import { datasetSpecialIds } from './constant'; +import { strIsLink } from '@fastgpt/common/tools/str'; + +export function isSpecialFileId(id: string) { + if (datasetSpecialIds.includes(id)) return true; + if (strIsLink(id)) return true; + return false; +} diff --git a/packages/core/package.json b/packages/core/package.json index 46673ad04..0329f7d68 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -3,7 +3,9 @@ "version": "1.0.0", "dependencies": { "openai": "^3.3.0", - "tunnel": "^0.0.6" + "tunnel": "^0.0.6", + "@fastgpt/common": "workspace:*", + "@fastgpt/support": "workspace:*" }, "devDependencies": { "@types/tunnel": "^0.0.4" diff --git a/packages/support/package.json b/packages/support/package.json index 583205a45..d3835b359 100644 --- a/packages/support/package.json +++ b/packages/support/package.json @@ -1,4 +1,7 @@ { "name": "@fastgpt/support", - "version": "1.0.0" + "version": "1.0.0", + "dependencies": { + "@fastgpt/common": "workspace:*" + } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ddcc1c3ca..6d4511557 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -31,6 +31,12 @@ importers: packages/core: dependencies: + '@fastgpt/common': + specifier: workspace:* + version: link:../common + '@fastgpt/support': + specifier: workspace:* + version: link:../support openai: specifier: ^3.3.0 version: registry.npmmirror.com/openai@3.3.0 @@ -42,7 +48,11 @@ importers: specifier: ^0.0.4 version: registry.npmmirror.com/@types/tunnel@0.0.4 - packages/support: {} + packages/support: + dependencies: + '@fastgpt/common': + specifier: workspace:* + version: link:../common projects/app: dependencies: diff --git a/projects/app/package.json b/projects/app/package.json index bb9af3626..f3c8dd820 100644 --- a/projects/app/package.json +++ b/projects/app/package.json @@ -1,6 +1,6 @@ { "name": "app", - "version": "4.4.6", + "version": "4.4.7", "private": false, "scripts": { "dev": "next dev", diff --git a/projects/app/public/docs/chatProblem.md b/projects/app/public/docs/chatProblem.md index 24036cca0..99256c060 100644 --- a/projects/app/public/docs/chatProblem.md +++ b/projects/app/public/docs/chatProblem.md @@ -9,6 +9,6 @@ - [计费规则](https://doc.fastgpt.run/docs/pricing/) **其他问题** -| 交流群 | 小助手 | -| ----------------------- | -------------------- | -| ![](https://otnvvf-imgs.oss.laf.run/wxqun300.jpg) | ![](https://otnvvf-imgs.oss.laf.run/wx300.jpg) | +| 添加小助手进入交流群 | +| ----------------------- | +| ![](https://otnvvf-imgs.oss.laf.run/wx300.jpg) | diff --git a/projects/app/public/imgs/files/link.svg b/projects/app/public/imgs/files/link.svg new file mode 100644 index 000000000..68534ba48 --- /dev/null +++ b/projects/app/public/imgs/files/link.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/projects/app/public/imgs/files/manual.svg b/projects/app/public/imgs/files/manual.svg new file mode 100644 index 000000000..fa8b3106a --- /dev/null +++ b/projects/app/public/imgs/files/manual.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/projects/app/public/imgs/files/mark.svg b/projects/app/public/imgs/files/mark.svg new file mode 100644 index 000000000..8c86e0099 --- /dev/null +++ b/projects/app/public/imgs/files/mark.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json index 026678635..eb4811079 100644 --- a/projects/app/public/locales/en/common.json +++ b/projects/app/public/locales/en/common.json @@ -127,6 +127,8 @@ "Output": "Output", "Password inconsistency": "Password inconsistency", "Rename": "Rename", + "Rename Failed": "Rename Failed", + "Rename Success": "Rename Success", "Search": "Search", "Status": "Status", "Update Successful": "Update Successful", @@ -214,10 +216,14 @@ "Filename": "Filename", "Files": "{{total}} Files", "Folder Name": "Input folder name", + "Insert Data": "Insert", + "Manual Data": "Manual Data", + "Manual Input": "Manual Input", + "Manual Mark": "Manual Mark", + "Mark Data": "Mark Data", "Move Failed": "Move Failed", "My Dataset": "My Dataset", "No Folder": "No Folder", - "Other Data": "Other Data", "Select Dataset": "Select Dataset", "Select Folder": "Enter folder", "Upload Time": "Upload Time", diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json index 102ac7a58..2c12ca555 100644 --- a/projects/app/public/locales/zh/common.json +++ b/projects/app/public/locales/zh/common.json @@ -127,6 +127,8 @@ "Output": "输出", "Password inconsistency": "两次密码不一致", "Rename": "重命名", + "Rename Failed": "重命名失败", + "Rename Success": "重命名成功", "Search": "搜索", "Status": "状态", "Update Successful": "更新成功", @@ -214,10 +216,14 @@ "Filename": "文件名", "Files": "文件: {{total}}个", "Folder Name": "输入文件夹名称", + "Insert Data": "插入", + "Manual Data": "手动录入", + "Manual Input": "手动录入", + "Manual Mark": "手动标注", + "Mark Data": "标注数据", "Move Failed": "移动出现错误~", "My Dataset": "我的知识库", "No Folder": "没有子目录了~", - "Other Data": "其他数据", "Select Dataset": "选择该知识库", "Select Folder": "进入文件夹", "Upload Time": "上传时间", diff --git a/projects/app/src/components/ChatBox/ResponseTags.tsx b/projects/app/src/components/ChatBox/ResponseTags.tsx index 8f2f3194b..f06a172b6 100644 --- a/projects/app/src/components/ChatBox/ResponseTags.tsx +++ b/projects/app/src/components/ChatBox/ResponseTags.tsx @@ -36,7 +36,8 @@ const ResponseTags = ({ responseData = [] }: { responseData?: ChatHistoryItemRes quoteList: responseData .filter((item) => item.moduleType === FlowModuleTypeEnum.chatNode) .map((item) => item.quoteList) - .flat(), + .flat() + .filter((item) => item) as QuoteItemType[], historyPreview: chatData?.historyPreview, runningTime: +responseData.reduce((sum, item) => sum + (item.runningTime || 0), 0).toFixed(2) }; diff --git a/projects/app/src/components/ChatBox/index.tsx b/projects/app/src/components/ChatBox/index.tsx index b2e75e7d0..aa674f733 100644 --- a/projects/app/src/components/ChatBox/index.tsx +++ b/projects/app/src/components/ChatBox/index.tsx @@ -63,6 +63,7 @@ import styles from './index.module.scss'; import Script from 'next/script'; import { postQuestionGuide } from '@/api/core/ai/agent/api'; import { splitGuideModule } from './utils'; +import { DatasetSpecialIdEnum } from '@fastgpt/core/dataset/constant'; const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 24); @@ -511,6 +512,12 @@ const ChatBox = ( // add guide text listener useEffect(() => { + const windowMessage = ({ data }: MessageEvent<{ type: 'sendPrompt'; text: string }>) => { + if (data?.type === 'sendPrompt' && data?.text) { + handleSubmit((item) => sendPrompt(item, data.text))(); + } + }; + window.addEventListener('message', windowMessage); event.on('guideClick', ({ text }: { text: string }) => { if (!text) return; handleSubmit((data) => sendPrompt(data, text))(); @@ -518,6 +525,7 @@ const ChatBox = ( return () => { event.off('guideClick'); + window.removeEventListener('message', windowMessage); }; }, [handleSubmit, sendPrompt]); @@ -995,7 +1003,8 @@ const ChatBox = ( defaultValues={{ dataId: adminMarkData.dataId, q: adminMarkData.q, - a: adminMarkData.a + a: adminMarkData.a, + file_id: DatasetSpecialIdEnum.mark }} /> )} diff --git a/projects/app/src/constants/common.ts b/projects/app/src/constants/common.ts index fd076d299..4fefa5268 100644 --- a/projects/app/src/constants/common.ts +++ b/projects/app/src/constants/common.ts @@ -3,15 +3,6 @@ export enum UserAuthTypeEnum { findPassword = 'findPassword' } -export const fileImgs = [ - { suffix: 'pdf', src: '/imgs/files/pdf.svg' }, - { suffix: 'csv', src: '/imgs/files/csv.svg' }, - { suffix: '(doc|docs)', src: '/imgs/files/doc.svg' }, - { suffix: 'txt', src: '/imgs/files/txt.svg' }, - { suffix: 'md', src: '/imgs/files/markdown.svg' }, - { suffix: '.', src: '/imgs/files/file.svg' } -]; - export enum TrackEventName { windowError = 'windowError', pageError = 'pageError', diff --git a/projects/app/src/constants/dataset.ts b/projects/app/src/constants/dataset.ts index ee15e954f..81ffff6e8 100644 --- a/projects/app/src/constants/dataset.ts +++ b/projects/app/src/constants/dataset.ts @@ -34,4 +34,3 @@ export const KbTypeMap = { }; export const FolderAvatarSrc = '/imgs/files/folder.svg'; -export const OtherFileId = 'other'; diff --git a/projects/app/src/pages/api/admin/initv447.ts b/projects/app/src/pages/api/admin/initv447.ts new file mode 100644 index 000000000..5d0ef3d18 --- /dev/null +++ b/projects/app/src/pages/api/admin/initv447.ts @@ -0,0 +1,109 @@ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@/service/response'; +import { authUser } from '@/service/utils/auth'; +import { connectToDatabase } from '@/service/mongo'; +import { PgClient } from '@/service/pg'; +import { PgDatasetTableName } from '@/constants/plugin'; +import { DatasetSpecialIdEnum } from '@fastgpt/core/dataset/constant'; +import mongoose, { Types } from 'mongoose'; +import { delay } from '@/utils/tools'; + +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + let initFileIds: string[] = []; + try { + const { limit = 100 } = req.body; + await connectToDatabase(); + await authUser({ req, authRoot: true }); + + console.log('add index'); + await PgClient.query( + ` + ALTER TABLE modeldata + ALTER COLUMN source TYPE VARCHAR(256), + ALTER COLUMN file_id TYPE VARCHAR(256); + CREATE INDEX IF NOT EXISTS modelData_fileId_index ON modeldata (file_id); + ` + ); + console.log('index success'); + console.log('count rows'); + // 去重获取 fileId + const { rows } = await PgClient.query(`SELECT DISTINCT file_id + FROM ${PgDatasetTableName} WHERE file_id IS NOT NULL AND file_id != ''; + `); + console.log('count rows success', rows.length); + console.log('start filter'); + for (let i = 0; i < rows.length; i += limit) { + await init(rows.slice(i, i + limit), initFileIds); + console.log(i); + } + console.log('filter success'); + console.log('start update'); + + for (let i = 0; i < initFileIds.length; i++) { + await PgClient.query(`UPDATE ${PgDatasetTableName} + SET file_id = '${DatasetSpecialIdEnum.manual}' + WHERE file_id = '${initFileIds[i]}'`); + console.log('update: ', initFileIds[i]); + } + + const { rows: emptyIds } = await PgClient.query( + `SELECT id FROM ${PgDatasetTableName} WHERE file_id IS NULL OR file_id=''` + ); + console.log(emptyIds.length); + + await delay(5000); + + async function start(start: number) { + for (let i = start; i < emptyIds.length; i += limit) { + await PgClient.query(`UPDATE ${PgDatasetTableName} + SET file_id = '${DatasetSpecialIdEnum.manual}' + WHERE id = '${emptyIds[i].id}'`); + console.log('update: ', i, emptyIds[i].id); + } + } + for (let i = 0; i < limit; i++) { + start(i); + } + + // await PgClient.query( + // `UPDATE ${PgDatasetTableName} + // SET file_id = '${DatasetSpecialIdEnum.manual}' + // WHERE file_id IS NULL OR file_id = ''` + // ); + + console.log('update success'); + + jsonRes(res, { + data: { + empty: emptyIds.length + } + }); + } catch (error) { + jsonRes(res, { + code: 500, + error + }); + } +} + +async function init(rows: any[], initFileIds: string[]) { + const collection = mongoose.connection.db.collection(`dataset.files`); + + /* 遍历所有的 fileId,去找有没有对应的文件,没有的话则改成manual */ + const updateResult = await Promise.allSettled( + rows.map(async (item) => { + // 找下是否有对应的文件 + const file = await collection.findOne({ + _id: new Types.ObjectId(item.file_id) + }); + + if (file) return ''; + // 没有文件的,改成manual + initFileIds.push(item.file_id); + + return item.file_id; + }) + ); + // @ts-ignore + console.log(updateResult.filter((item) => item?.value).length); +} diff --git a/projects/app/src/pages/api/core/dataset/data/exportAll.ts b/projects/app/src/pages/api/core/dataset/data/exportAll.ts index 3652fb12b..ddb5efe91 100644 --- a/projects/app/src/pages/api/core/dataset/data/exportAll.ts +++ b/projects/app/src/pages/api/core/dataset/data/exportAll.ts @@ -91,6 +91,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< if (res.closed) { return stream.destroy(); } + q = q.replace(/"/g, '""'); + a = a.replace(/"/g, '""'); + source = source?.replace(/"/g, '""'); + write(`\n"${q}","${a || ''}","${source || ''}"`); }); // finish diff --git a/projects/app/src/pages/api/core/dataset/data/getDataList.ts b/projects/app/src/pages/api/core/dataset/data/getDataList.ts index 8314a4717..a09e2bd7d 100644 --- a/projects/app/src/pages/api/core/dataset/data/getDataList.ts +++ b/projects/app/src/pages/api/core/dataset/data/getDataList.ts @@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo'; import { authUser } from '@/service/utils/auth'; import { PgClient } from '@/service/pg'; import { PgDatasetTableName } from '@/constants/plugin'; -import { OtherFileId } from '@/constants/dataset'; import type { PgDataItemType } from '@/types/core/dataset/data'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { @@ -36,15 +35,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< ['user_id', userId], 'AND', ['kb_id', kbId], - ...(fileId - ? fileId === OtherFileId - ? ["AND (file_id IS NULL OR file_id = '')"] - : ['AND', ['file_id', fileId]] - : []), + 'AND', + ['file_id', fileId], ...(searchText ? [ 'AND', - `(q LIKE '%${searchText}%' OR a LIKE '%${searchText}%' OR source LIKE '%${searchText}%')` + `(q ILIKE '%${searchText}%' OR a ILIKE '%${searchText}%' OR source ILIKE '%${searchText}%')` ] : []) ]; diff --git a/projects/app/src/pages/api/core/dataset/data/insertData.ts b/projects/app/src/pages/api/core/dataset/data/insertData.ts index 23e351045..dace05c99 100644 --- a/projects/app/src/pages/api/core/dataset/data/insertData.ts +++ b/projects/app/src/pages/api/core/dataset/data/insertData.ts @@ -1,3 +1,7 @@ +/* + insert one data to dataset (immediately insert) + manual input or mark data +*/ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@/service/response'; import { connectToDatabase } from '@/service/mongo'; @@ -11,7 +15,6 @@ import { DatasetDataItemType } from '@/types/core/dataset/data'; import { countPromptTokens } from '@/utils/common/tiktoken'; export type Props = { - billId?: string; kbId: string; data: DatasetDataItemType; }; @@ -40,7 +43,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex export async function getVectorAndInsertDataset( props: Props & { userId: string } ): Promise { - const { kbId, data, userId, billId } = props; + const { kbId, data, userId } = props; if (!kbId || !data?.q) { return Promise.reject('缺少参数'); } @@ -61,7 +64,7 @@ export async function getVectorAndInsertDataset( const { rows: existsRows } = await PgClient.query(` SELECT COUNT(*) > 0 AS exists FROM ${PgDatasetTableName} - WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND kb_id='${kbId}' + WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND file_id='${data.file_id}' AND kb_id='${kbId}' `); const exists = existsRows[0]?.exists || false; @@ -72,8 +75,7 @@ export async function getVectorAndInsertDataset( const { vectors } = await getVector({ model: kb.vectorModel, input: [q], - userId, - billId + userId }); const response = await insertData2Dataset({ diff --git a/projects/app/src/pages/api/core/dataset/file/delById.ts b/projects/app/src/pages/api/core/dataset/file/delById.ts index cb95c5812..dbc82a72d 100644 --- a/projects/app/src/pages/api/core/dataset/file/delById.ts +++ b/projects/app/src/pages/api/core/dataset/file/delById.ts @@ -6,7 +6,7 @@ import { GridFSStorage } from '@/service/lib/gridfs'; import { PgClient } from '@/service/pg'; import { PgDatasetTableName } from '@/constants/plugin'; import { Types } from 'mongoose'; -import { OtherFileId } from '@/constants/dataset'; +import { isSpecialFileId } from '@fastgpt/core/dataset/utils'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -22,14 +22,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const { userId } = await authUser({ req, authToken: true }); // other data. Delete only vector data - if (fileId === OtherFileId) { + if (isSpecialFileId(fileId)) { await PgClient.delete(PgDatasetTableName, { - where: [ - ['user_id', userId], - 'AND', - ['kb_id', kbId], - "AND (file_id IS NULL OR file_id = '')" - ] + where: [['user_id', userId], 'AND', ['kb_id', kbId], 'AND', ['file_id', fileId]] }); } else { // auth file @@ -48,7 +43,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< file_id: fileId }); - // delete file + // delete file await bucket.delete(new Types.ObjectId(fileId)); } diff --git a/projects/app/src/pages/api/core/dataset/file/detail.ts b/projects/app/src/pages/api/core/dataset/file/detail.ts index dc3567040..115800b4e 100644 --- a/projects/app/src/pages/api/core/dataset/file/detail.ts +++ b/projects/app/src/pages/api/core/dataset/file/detail.ts @@ -3,8 +3,12 @@ import { jsonRes } from '@/service/response'; import { connectToDatabase } from '@/service/mongo'; import { authUser } from '@/service/utils/auth'; import { GridFSStorage } from '@/service/lib/gridfs'; -import { OtherFileId } from '@/constants/dataset'; +import { datasetSpecialIdMap } from '@fastgpt/core/dataset/constant'; +import { datasetSpecialIds } from '@fastgpt/core/dataset/constant'; import type { GSFileInfoType } from '@/types/common/file'; +import { strIsLink } from '@fastgpt/common/tools/str'; +import { PgClient } from '@/service/pg'; +import { PgDatasetTableName } from '@/constants/plugin'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -14,12 +18,32 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< // 凭证校验 const { userId } = await authUser({ req, authToken: true }); - if (fileId === OtherFileId) { + // manual, mark + if (datasetSpecialIds.includes(fileId)) { return jsonRes(res, { data: { - id: OtherFileId, + id: fileId, size: 0, - filename: 'kb.Other Data', + // @ts-ignore + filename: datasetSpecialIdMap[fileId]?.name || fileId, + uploadDate: new Date(), + encoding: '', + contentType: '' + } + }); + } + // link file + if (strIsLink(fileId)) { + const { rows } = await PgClient.select(PgDatasetTableName, { + where: [['user_id', userId], 'AND', ['file_id', fileId]], + limit: 1, + fields: ['source'] + }); + return jsonRes(res, { + data: { + id: fileId, + size: 0, + filename: rows[0]?.source || fileId, uploadDate: new Date(), encoding: '', contentType: '' diff --git a/projects/app/src/pages/api/core/dataset/file/list.ts b/projects/app/src/pages/api/core/dataset/file/list.ts index 536e244a7..49037fa61 100644 --- a/projects/app/src/pages/api/core/dataset/file/list.ts +++ b/projects/app/src/pages/api/core/dataset/file/list.ts @@ -5,7 +5,14 @@ import { authUser } from '@/service/utils/auth'; import { GridFSStorage } from '@/service/lib/gridfs'; import { PgClient } from '@/service/pg'; import { PgDatasetTableName } from '@/constants/plugin'; -import { FileStatusEnum, OtherFileId } from '@/constants/dataset'; +import { FileStatusEnum } from '@/constants/dataset'; +import { strIsLink } from '@fastgpt/common/tools/str'; +import { + DatasetSpecialIdEnum, + datasetSpecialIdMap, + datasetSpecialIds +} from '@fastgpt/core/dataset/constant'; +import { Types } from 'mongoose'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -22,57 +29,106 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< // 凭证校验 const { userId } = await authUser({ req, authToken: true }); + // select and count same file_id data, exclude special id + const pgWhere = `user_id = '${userId}' AND kb_id = '${kbId}' ${datasetSpecialIds + .map((item) => `AND file_id!='${item}'`) + .join(' ')} + ${searchText ? `AND source ILIKE '%${searchText}%'` : ''}`; + + const [{ rows }, { rowCount: total }] = await Promise.all([ + PgClient.query(`SELECT file_id, COUNT(*) AS count + FROM ${PgDatasetTableName} + where ${pgWhere} + GROUP BY file_id + ORDER BY file_id DESC + LIMIT ${pageSize} OFFSET ${(pageNum - 1) * pageSize}; + `), + PgClient.query(`SELECT DISTINCT file_id + FROM ${PgDatasetTableName} + where ${pgWhere} + `) + ]); + // find files const gridFs = new GridFSStorage('dataset', userId); const collection = gridFs.Collection(); - const mongoWhere = { - ['metadata.kbId']: kbId, - ['metadata.userId']: userId, - ['metadata.datasetUsed']: true, - ...(searchText && { filename: { $regex: searchText } }) - }; - const [files, total] = await Promise.all([ - collection - .find(mongoWhere, { - projection: { - _id: 1, - filename: 1, - uploadDate: 1, - length: 1 - } - }) - .skip((pageNum - 1) * pageSize) - .limit(pageSize) - .sort({ uploadDate: -1 }) - .toArray(), - collection.countDocuments(mongoWhere) - ]); - - async function GetOtherData() { - return { - id: OtherFileId, - size: 0, - filename: 'kb.Other Data', - uploadTime: new Date(), - status: (await TrainingData.findOne({ userId, kbId, file_id: '' })) - ? FileStatusEnum.embedding - : FileStatusEnum.ready, - chunkLength: await PgClient.count(PgDatasetTableName, { - fields: ['id'], - where: [ - ['user_id', userId], - 'AND', - ['kb_id', kbId], - "AND (file_id IS NULL OR file_id = '')" - ] - }) - }; + async function getSpecialData() { + if (pageNum !== 1) return []; + return [ + { + id: DatasetSpecialIdEnum.manual, + size: 0, + filename: datasetSpecialIdMap[DatasetSpecialIdEnum.manual].name, + uploadTime: new Date(), + status: FileStatusEnum.ready, + chunkLength: await PgClient.count(PgDatasetTableName, { + fields: ['id'], + where: [ + ['user_id', userId], + 'AND', + ['file_id', DatasetSpecialIdEnum.manual], + 'AND', + ['kb_id', kbId] + ] + }) + }, + { + id: DatasetSpecialIdEnum.mark, + size: 0, + filename: datasetSpecialIdMap[DatasetSpecialIdEnum.mark].name, + uploadTime: new Date(), + status: FileStatusEnum.ready, + chunkLength: await PgClient.count(PgDatasetTableName, { + fields: ['id'], + where: [ + ['user_id', userId], + 'AND', + ['file_id', DatasetSpecialIdEnum.mark], + 'AND', + ['kb_id', kbId] + ] + }) + } + ]; } const data = await Promise.all([ - GetOtherData(), - ...files.map(async (file) => { + getSpecialData(), + ...rows.map(async (row) => { + // link data + if (strIsLink(row.file_id)) { + const { rows } = await PgClient.select(PgDatasetTableName, { + where: [['user_id', userId], 'AND', ['file_id', row.file_id]], + limit: 1, + fields: ['source'] + }); + return { + id: row.file_id, + size: 0, + filename: rows[0]?.source || row.file_id, + uploadTime: new Date(), + status: FileStatusEnum.ready, + chunkLength: row.count + }; + } + // file data + const file = await collection.findOne( + { + _id: new Types.ObjectId(row.file_id), + ['metadata.userId']: userId, + ['metadata.kbId']: kbId + }, + { + projection: { + _id: 1, + filename: 1, + uploadDate: 1, + length: 1 + } + } + ); + if (!file) return null; return { id: String(file._id), size: file.length, @@ -81,16 +137,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< status: (await TrainingData.findOne({ userId, kbId, file_id: file._id })) ? FileStatusEnum.embedding : FileStatusEnum.ready, - chunkLength: await PgClient.count(PgDatasetTableName, { - fields: ['id'], - where: [ - ['user_id', userId], - 'AND', - ['kb_id', kbId], - 'AND', - ['file_id', String(file._id)] - ] - }) + chunkLength: row.count }; }) ]); @@ -99,7 +146,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< data: { pageNum, pageSize, - data: data.flat(), + data: data.flat().filter((item) => item), total } }); diff --git a/projects/app/src/pages/api/core/dataset/file/update.ts b/projects/app/src/pages/api/core/dataset/file/update.ts index df95a327d..eeb2947ef 100644 --- a/projects/app/src/pages/api/core/dataset/file/update.ts +++ b/projects/app/src/pages/api/core/dataset/file/update.ts @@ -8,6 +8,7 @@ import { Types } from 'mongoose'; import { PgClient } from '@/service/pg'; import { PgDatasetTableName } from '@/constants/plugin'; import { addLog } from '@/service/utils/tools'; +import { strIsLink } from '@fastgpt/common/tools/str'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -19,20 +20,22 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const gridFs = new GridFSStorage('dataset', userId); const collection = gridFs.Collection(); - await collection.findOneAndUpdate( - { - _id: new Types.ObjectId(id) - }, - { - $set: { - ...(name && { filename: name }), - ...(datasetUsed && { ['metadata.datasetUsed']: datasetUsed }) + if (id.length === 24 && !strIsLink(id)) { + await collection.findOneAndUpdate( + { + _id: new Types.ObjectId(id) + }, + { + $set: { + ...(name && { filename: name }), + ...(datasetUsed && { ['metadata.datasetUsed']: datasetUsed }) + } } - } - ); + ); + } // data source - updateDatasetSource({ + await updateDatasetSource({ fileId: id, userId, name diff --git a/projects/app/src/pages/kb/detail/components/DataCard.tsx b/projects/app/src/pages/kb/detail/components/DataCard.tsx index f19f0c55c..79195638d 100644 --- a/projects/app/src/pages/kb/detail/components/DataCard.tsx +++ b/projects/app/src/pages/kb/detail/components/DataCard.tsx @@ -1,31 +1,28 @@ import React, { useCallback, useState, useRef, useMemo } from 'react'; -import { Box, Card, IconButton, Flex, Grid, Image } from '@chakra-ui/react'; +import { Box, Card, IconButton, Flex, Grid, Image, Button } from '@chakra-ui/react'; import type { PgDataItemType } from '@/types/core/dataset/data'; import { usePagination } from '@/hooks/usePagination'; -import { - getDatasetDataList, - delOneDatasetDataById, - getTrainingData -} from '@/api/core/dataset/data'; +import { getDatasetDataList, delOneDatasetDataById } from '@/api/core/dataset/data'; import { getFileInfoById } from '@/api/core/dataset/file'; import { DeleteIcon, RepeatIcon } from '@chakra-ui/icons'; import { useQuery } from '@tanstack/react-query'; import { useToast } from '@/hooks/useToast'; -import InputModal, { FormData as InputDataType } from './InputDataModal'; +import InputModal, { FormData as InputDataType, RawFileText } from './InputDataModal'; import { debounce } from 'lodash'; import { getErrText } from '@/utils/tools'; import { useConfirm } from '@/hooks/useConfirm'; import { useTranslation } from 'react-i18next'; import { useRouter } from 'next/router'; import MyIcon from '@/components/Icon'; -import MyTooltip from '@/components/MyTooltip'; import MyInput from '@/components/MyInput'; -import { fileImgs } from '@/constants/common'; +import { useLoading } from '@/hooks/useLoading'; +import { getFileIcon, getSpecialFileIcon } from '@fastgpt/common/tools/file'; const DataCard = ({ kbId }: { kbId: string }) => { const BoxRef = useRef(null); const lastSearch = useRef(''); const router = useRouter(); + const { Loading, setIsLoading } = useLoading({ defaultLoading: true }); const { fileId = '' } = router.query as { fileId: string }; const { t } = useTranslation(); const [searchText, setSearchText] = useState(''); @@ -37,7 +34,6 @@ const DataCard = ({ kbId }: { kbId: string }) => { const { data: kbDataList, - isLoading, Pagination, total, getData, @@ -52,6 +48,7 @@ const DataCard = ({ kbId }: { kbId: string }) => { fileId }, onChange() { + setIsLoading(false); if (BoxRef.current) { BoxRef.current.scrollTop = 0; } @@ -72,9 +69,8 @@ const DataCard = ({ kbId }: { kbId: string }) => { // get file info const { data: fileInfo } = useQuery(['getFileInfo', fileId], () => getFileInfoById(fileId)); const fileIcon = useMemo( - () => - fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(fileInfo?.filename || ''))?.src, - [fileInfo?.filename] + () => getSpecialFileIcon(fileInfo?.id) || getFileIcon(fileInfo?.filename), + [fileInfo?.filename, fileInfo?.id] ); return ( @@ -82,10 +78,9 @@ const DataCard = ({ kbId }: { kbId: string }) => { } + icon={} bg={'white'} boxShadow={'1px 1px 9px rgba(0,0,0,0.15)'} - h={'28px'} size={'sm'} borderRadius={'50%'} aria-label={''} @@ -98,30 +93,34 @@ const DataCard = ({ kbId }: { kbId: string }) => { }) } /> - + {''} - {t(fileInfo?.filename || 'Filename')} + - - } - size={['sm', 'md']} - aria-label={'refresh'} - variant={'base'} - isLoading={isLoading} - onClick={() => { - getData(pageNum); - getTrainingData({ kbId, init: true }); - }} - /> - + @@ -249,6 +248,7 @@ const DataCard = ({ kbId }: { kbId: string }) => { /> )} + ); }; diff --git a/projects/app/src/pages/kb/detail/components/FileCard.tsx b/projects/app/src/pages/kb/detail/components/FileCard.tsx index 9d343f8ce..9bf6b77e3 100644 --- a/projects/app/src/pages/kb/detail/components/FileCard.tsx +++ b/projects/app/src/pages/kb/detail/components/FileCard.tsx @@ -22,16 +22,17 @@ import { useTranslation } from 'react-i18next'; import MyIcon from '@/components/Icon'; import MyInput from '@/components/MyInput'; import dayjs from 'dayjs'; -import { fileImgs } from '@/constants/common'; import { useRequest } from '@/hooks/useRequest'; import { useLoading } from '@/hooks/useLoading'; -import { FileStatusEnum, OtherFileId } from '@/constants/dataset'; +import { FileStatusEnum } from '@/constants/dataset'; import { useRouter } from 'next/router'; import { usePagination } from '@/hooks/usePagination'; import type { DatasetFileItemType } from '@/types/core/dataset/file'; import { useGlobalStore } from '@/store/global'; import MyMenu from '@/components/MyMenu'; import { useEditTitle } from '@/hooks/useEditTitle'; +import { datasetSpecialIds } from '@fastgpt/core/dataset/constant'; +import { getFileIcon, getSpecialFileIcon } from '@fastgpt/common/tools/file'; const FileCard = ({ kbId }: { kbId: string }) => { const BoxRef = useRef(null); @@ -79,10 +80,14 @@ const FileCard = ({ kbId }: { kbId: string }) => { // add file icon const formatFiles = useMemo( () => - files.map((file) => ({ - ...file, - icon: fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(file.filename))?.src - })), + files.map((file) => { + const icon = getSpecialFileIcon(file.id) || getFileIcon(file.filename); + + return { + ...file, + icon + }; + }), [files] ); @@ -114,8 +119,8 @@ const FileCard = ({ kbId }: { kbId: string }) => { onSettled() { setLoading(false); }, - successToast: t('common.Delete Success'), - errorToast: t('common.Delete Failed') + successToast: t('common.Rename Success'), + errorToast: t('common.Rename Failed') }); const { onOpenModal, EditModal: EditTitleModal } = useEditTitle({ @@ -135,11 +140,15 @@ const FileCard = ({ kbId }: { kbId: string }) => { // training data const { data: { qaListLen = 0, vectorListLen = 0 } = {}, refetch: refetchTrainingData } = - useQuery(['getModelSplitDataList', kbId], () => getTrainingData({ kbId, init: false }), { - onError(err) { - console.log(err); + useQuery( + ['getModelSplitDataList', kbId], + () => getTrainingData({ kbId, init: Math.random() > 0.7 }), + { + onError(err) { + console.log(err); + } } - }); + ); useQuery( ['refetchTrainingData', kbId], @@ -279,7 +288,7 @@ const FileCard = ({ kbId }: { kbId: string }) => { } menuList={[ - ...(file.id !== OtherFileId + ...(!datasetSpecialIds.includes(file.id) ? [ { child: ( diff --git a/projects/app/src/pages/kb/detail/components/Import/CreateFileModal.tsx b/projects/app/src/pages/kb/detail/components/Import/CreateFileModal.tsx index e9b6e8988..545b519af 100644 --- a/projects/app/src/pages/kb/detail/components/Import/CreateFileModal.tsx +++ b/projects/app/src/pages/kb/detail/components/Import/CreateFileModal.tsx @@ -3,13 +3,14 @@ import { useTranslation } from 'next-i18next'; import MyModal from '@/components/MyModal'; import { Box, Input, Textarea, ModalBody, ModalFooter, Button } from '@chakra-ui/react'; import { useForm } from 'react-hook-form'; +import { useRequest } from '@/hooks/useRequest'; const CreateFileModal = ({ onClose, onSuccess }: { onClose: () => void; - onSuccess: (e: { filename: string; content: string }) => void; + onSuccess: (e: { filename: string; content: string }) => Promise; }) => { const { t } = useTranslation(); const { register, handleSubmit } = useForm({ @@ -19,6 +20,13 @@ const CreateFileModal = ({ } }); + const { mutate, isLoading } = useRequest({ + mutationFn: () => handleSubmit(onSuccess)(), + onSuccess: () => { + onClose(); + } + }); + return ( @@ -47,12 +55,7 @@ const CreateFileModal = ({ - diff --git a/projects/app/src/pages/kb/detail/components/Import/FileSelect.tsx b/projects/app/src/pages/kb/detail/components/Import/FileSelect.tsx index 7cb03b830..bb2b5a641 100644 --- a/projects/app/src/pages/kb/detail/components/Import/FileSelect.tsx +++ b/projects/app/src/pages/kb/detail/components/Import/FileSelect.tsx @@ -12,7 +12,6 @@ import { readDocContent } from '@/utils/web/file'; import { Box, Flex, useDisclosure, type BoxProps } from '@chakra-ui/react'; -import { fileImgs } from '@/constants/common'; import { DragEvent, useCallback, useState } from 'react'; import { useTranslation } from 'next-i18next'; import { customAlphabet } from 'nanoid'; @@ -22,12 +21,13 @@ import { FetchResultItem } from '@/types/plugin'; import type { DatasetDataItemType } from '@/types/core/dataset/data'; import { getErrText } from '@/utils/tools'; import { useDatasetStore } from '@/store/dataset'; +import { getFileIcon } from '@fastgpt/common/tools/file'; const UrlFetchModal = dynamic(() => import('./UrlFetchModal')); const CreateFileModal = dynamic(() => import('./CreateFileModal')); const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12); -const csvTemplate = `index,content,source\n"被索引的内容","对应的答案。CSV 中请注意内容不能包含双引号,双引号是列分割符号","来源,可选。"\n"什么是 laf","laf 是一个云函数开发平台……",""\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……",""`; +const csvTemplate = `index,content\n"被索引的内容","对应的答案。CSV 中请注意内容不能包含双引号,双引号是列分割符号"\n"什么是 laf","laf 是一个云函数开发平台……",""\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……"`; export type FileItemType = { id: string; @@ -63,7 +63,7 @@ const FileSelect = ({ const { toast } = useToast(); - const { File, onOpen } = useSelectFile({ + const { File: FileSelector, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true }); @@ -92,11 +92,9 @@ const FileSelect = ({ const extension = file?.name?.split('.')?.pop()?.toLowerCase(); /* text file */ - const icon = fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(file.name))?.src; + const icon = getFileIcon(file?.name); - if (!icon) { - continue; - } + if (!icon) continue; // parse and upload files let [text, filesId] = await Promise.all([ @@ -165,7 +163,7 @@ const FileSelect = ({ .map((item) => ({ q: item[0] || '', a: item[1] || '', - source: item[2] || file.name || '', + source: file.name || '', file_id: filesId[0] })) }; @@ -201,7 +199,8 @@ const FileSelect = ({ chunks: splitRes.chunks.map((chunk) => ({ q: chunk, a: '', - source: url + source: url, + file_id: url })) }; }); @@ -210,15 +209,25 @@ const FileSelect = ({ [chunkLen, onPushFiles] ); const onCreateFile = useCallback( - ({ filename, content }: { filename: string; content: string }) => { + async ({ filename, content }: { filename: string; content: string }) => { content = simpleText(content); + + // create virtual txt file + const txtBlob = new Blob([content], { type: 'text/plain' }); + const txtFile = new File([txtBlob], `${filename}.txt`, { + type: txtBlob.type, + lastModified: new Date().getTime() + }); + const fileIds = await uploadFiles([txtFile], { kbId: kbDetail._id }); + const splitRes = splitText2Chunks({ text: content, maxLen: chunkLen }); + onPushFiles([ { - id: nanoid(), + id: fileIds[0], filename, icon: '/imgs/files/txt.svg', text: content, @@ -226,12 +235,13 @@ const FileSelect = ({ chunks: splitRes.chunks.map((chunk) => ({ q: chunk, a: '', - source: filename + source: filename, + file_id: fileIds[0] })) } ]); }, - [chunkLen, onPushFiles] + [chunkLen, kbDetail._id, onPushFiles] ); const handleDragEnter = (e: DragEvent) => { @@ -383,7 +393,7 @@ const FileSelect = ({ {selectingText !== undefined && ( )} - + {isOpenUrlFetch && } {isOpenCreateFile && } diff --git a/projects/app/src/pages/kb/detail/components/Import/Manual.tsx b/projects/app/src/pages/kb/detail/components/Import/Manual.tsx index f75d7ef47..b4d75dd70 100644 --- a/projects/app/src/pages/kb/detail/components/Import/Manual.tsx +++ b/projects/app/src/pages/kb/detail/components/Import/Manual.tsx @@ -4,11 +4,11 @@ import { useForm } from 'react-hook-form'; import { useToast } from '@/hooks/useToast'; import { useRequest } from '@/hooks/useRequest'; import { getErrText } from '@/utils/tools'; -import { postChunks2Dataset } from '@/api/core/dataset/data'; -import { TrainingModeEnum } from '@/constants/plugin'; +import { postData2Dataset } from '@/api/core/dataset/data'; import MyTooltip from '@/components/MyTooltip'; import { QuestionOutlineIcon } from '@chakra-ui/icons'; import { useDatasetStore } from '@/store/dataset'; +import { DatasetSpecialIdEnum, datasetSpecialIdMap } from '@fastgpt/core/dataset/constant'; type ManualFormType = { q: string; a: string }; @@ -33,32 +33,24 @@ const ManualImport = ({ kbId }: { kbId: string }) => { } try { - const data = { - a: e.a, - q: e.q, - source: '手动录入' - }; - const { insertLen } = await postChunks2Dataset({ + await postData2Dataset({ kbId, - mode: TrainingModeEnum.index, - data: [data] + data: { + a: e.a, + q: e.q, + source: datasetSpecialIdMap[DatasetSpecialIdEnum.manual]?.sourceName, + file_id: DatasetSpecialIdEnum.manual + } }); - if (insertLen === 0) { - toast({ - title: '已存在完全一致的数据', - status: 'warning' - }); - } else { - toast({ - title: '导入数据成功,需要一段时间训练', - status: 'success' - }); - reset({ - a: '', - q: '' - }); - } + toast({ + title: '导入数据成功,需要一段时间训练', + status: 'success' + }); + reset({ + a: '', + q: '' + }); } catch (err: any) { toast({ title: getErrText(err, '出现了点意外~'), diff --git a/projects/app/src/pages/kb/detail/components/InputDataModal.tsx b/projects/app/src/pages/kb/detail/components/InputDataModal.tsx index 289d8c37f..f02515b0a 100644 --- a/projects/app/src/pages/kb/detail/components/InputDataModal.tsx +++ b/projects/app/src/pages/kb/detail/components/InputDataModal.tsx @@ -1,4 +1,4 @@ -import React, { useState, useCallback } from 'react'; +import React, { useState, useCallback, useMemo } from 'react'; import { Box, Flex, Button, Textarea, IconButton, BoxProps } from '@chakra-ui/react'; import { useForm } from 'react-hook-form'; import { @@ -17,6 +17,9 @@ import { DatasetDataItemType } from '@/types/core/dataset/data'; import { useTranslation } from 'react-i18next'; import { useDatasetStore } from '@/store/dataset'; import { getFileAndOpen } from '@/utils/web/file'; +import { datasetSpecialIdMap, datasetSpecialIds } from '@fastgpt/core/dataset/constant'; +import { strIsLink } from '@fastgpt/common/tools/str'; +import { useGlobalStore } from '@/store/global'; export type FormData = { dataId?: string } & DatasetDataItemType; @@ -25,16 +28,13 @@ const InputDataModal = ({ onSuccess, onDelete, kbId, - defaultValues = { - a: '', - q: '' - } + defaultValues }: { onClose: () => void; onSuccess: (data: FormData) => void; onDelete?: () => void; kbId: string; - defaultValues?: FormData; + defaultValues: FormData; }) => { const { t } = useTranslation(); const [loading, setLoading] = useState(false); @@ -64,10 +64,10 @@ const InputDataModal = ({ try { const data = { + ...e, dataId: '', - a: e.a, - q: e.q, - source: '手动录入' + // @ts-ignore + source: e.source || datasetSpecialIdMap[e.file_id]?.sourceName }; data.dataId = await postData2Dataset({ kbId, @@ -79,6 +79,7 @@ const InputDataModal = ({ status: 'success' }); reset({ + ...e, a: '', q: '' }); @@ -103,9 +104,9 @@ const InputDataModal = ({ setLoading(true); try { const data = { + ...e, dataId: e.dataId, kbId, - a: e.a, q: e.q === defaultValues.q ? '' : e.q }; await putDatasetDataById(data); @@ -259,31 +260,40 @@ interface RawFileTextProps extends BoxProps { export function RawFileText({ fileId, filename = '', ...props }: RawFileTextProps) { const { t } = useTranslation(); const { toast } = useToast(); + const { setLoading } = useGlobalStore(); + + const hasFile = useMemo(() => fileId && !datasetSpecialIds.includes(fileId), [fileId]); + return ( - + { + if (strIsLink(fileId)) { + return window.open(fileId, '_blank'); + } + setLoading(true); try { - await getFileAndOpen(fileId); + await getFileAndOpen(fileId as string); } catch (error) { toast({ title: getErrText(error, '获取文件地址失败'), status: 'error' }); } + setLoading(false); } } : {})} {...props} > - {filename} + {t(filename)} ); diff --git a/projects/app/src/service/pg.ts b/projects/app/src/service/pg.ts index e5c2478e4..fe40fd35f 100644 --- a/projects/app/src/service/pg.ts +++ b/projects/app/src/service/pg.ts @@ -179,8 +179,8 @@ export const insertData2Dataset = ({ values: data.map((item) => [ { key: 'user_id', value: userId }, { key: 'kb_id', value: kbId }, - { key: 'source', value: item.source?.slice(0, 60)?.trim() || '' }, - { key: 'file_id', value: item.file_id || '' }, + { key: 'source', value: item.source?.slice(0, 200)?.trim() || '' }, + { key: 'file_id', value: item.file_id?.slice(0, 200)?.trim() || '' }, { key: 'q', value: item.q.replace(/'/g, '"') }, { key: 'a', value: item.a.replace(/'/g, '"') }, { key: 'vector', value: `[${item.vector}]` } @@ -198,13 +198,14 @@ export async function initPg() { vector VECTOR(1536) NOT NULL, user_id VARCHAR(50) NOT NULL, kb_id VARCHAR(50), - source VARCHAR(100), - file_id VARCHAR(100), + source VARCHAR(256), + file_id VARCHAR(256), q TEXT NOT NULL, a TEXT ); CREATE INDEX IF NOT EXISTS modelData_userId_index ON ${PgDatasetTableName} USING HASH (user_id); - CREATE INDEX IF NOT EXISTS modelData_kbId_index ON ${PgDatasetTableName} USING HASH (kb_id); + CREATE INDEX IF NOT EXISTS modelData_kb_id_index ON ${PgDatasetTableName} (kb_id); + CREATE INDEX IF NOT EXISTS modelData_fileId_index ON ${PgDatasetTableName} (file_id); CREATE INDEX IF NOT EXISTS idx_model_data_md5_q_a_user_id_kb_id ON ${PgDatasetTableName} (md5(q), md5(a), user_id, kb_id); `); console.log('init pg successful'); diff --git a/projects/app/src/types/core/dataset/data.d.ts b/projects/app/src/types/core/dataset/data.d.ts index e8a9e2f6c..a487330c0 100644 --- a/projects/app/src/types/core/dataset/data.d.ts +++ b/projects/app/src/types/core/dataset/data.d.ts @@ -2,9 +2,9 @@ export type DatasetDataItemType = { q: string; // 提问词 a: string; // 原文 source?: string; - file_id?: string; + file_id: string; }; -export type PgDataItemType = DatasetItemType & { +export type PgDataItemType = DatasetDataItemType & { id: string; }; diff --git a/shdemo.sh b/shdemo.sh new file mode 100644 index 000000000..dfe577eef --- /dev/null +++ b/shdemo.sh @@ -0,0 +1,4 @@ +# Build image +docker build -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.4.7 --build-arg name=app . + +