diff --git a/.vscode/settings.json b/.vscode/settings.json
index 141c85306..eaf1fd95f 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -2,7 +2,7 @@
"editor.formatOnSave": true,
"editor.mouseWheelZoom": true,
"typescript.tsdk": "node_modules/typescript/lib",
- "editor.defaultFormatter": "esbenp.prettier-vscode",
+ "prettier.prettierPath": "./node_modules/prettier",
"i18n-ally.localesPaths": [
"projects/app/public/locales"
],
diff --git a/docSite/README.md b/docSite/README.md
index 21668d49f..a3b8ba755 100644
--- a/docSite/README.md
+++ b/docSite/README.md
@@ -3,7 +3,7 @@
## 本地运行
1. 安装 go 语言环境。
-2. 安装 hugo。 [二进制下载](https://github.com/gohugoio/hugo/releases/tag/v0.117.0)
+2. 安装 hugo。 [二进制下载](https://github.com/gohugoio/hugo/releases/tag/v0.117.0),注意需要安装 extended 版本。
3. cd docSite
4. hugo serve
5. 访问 http://localhost:1313
diff --git a/docSite/content/docs/development/configuration.md b/docSite/content/docs/development/configuration.md
index e238f4c38..325996e54 100644
--- a/docSite/content/docs/development/configuration.md
+++ b/docSite/content/docs/development/configuration.md
@@ -84,6 +84,14 @@ weight: 520
"maxToken": 16000,
"price": 0,
"prompt": ""
+ },
+ "QGModel": { // 生成下一步指引模型
+ "model": "gpt-3.5-turbo",
+ "name": "GPT35-4k",
+ "maxToken": 4000,
+ "price": 0,
+ "prompt": "",
+ "functionCall": false
}
}
```
diff --git a/docSite/content/docs/development/design/_index.md b/docSite/content/docs/development/design/_index.md
new file mode 100644
index 000000000..b0564c0d7
--- /dev/null
+++ b/docSite/content/docs/development/design/_index.md
@@ -0,0 +1,8 @@
+---
+weight: 540
+title: "设计方案"
+description: "FastGPT 部分设计方案"
+icon: public
+draft: false
+images: []
+---
\ No newline at end of file
diff --git a/docSite/content/docs/development/design/dataset.md b/docSite/content/docs/development/design/dataset.md
new file mode 100644
index 000000000..bf731aab6
--- /dev/null
+++ b/docSite/content/docs/development/design/dataset.md
@@ -0,0 +1,25 @@
+---
+weight: 541
+title: "数据集"
+description: "FastGPT 数据集中文件与数据的设计方案"
+icon: dataset
+draft: false
+images: []
+---
+
+## 文件与数据的关系
+
+在 FastGPT 中,文件会通过 MongoDB 的 FS 存储,而具体的数据会通过 PostgreSQL 存储,PG 中的数据会有一列 file_id,关联对应的文件。考虑到旧版本的兼容,以及手动输入、标注数据等,我们给 file_id 增加了一些特殊的值,如下:
+
+- manual: 手动输入
+- mark: 手动标注的数据
+
+注意,file_id 仅在插入数据时会写入,变更时无法修改。
+
+## 文件导入流程
+
+1. 上传文件到 MongoDB 的 FS 中,获取 file_id,此时文件标记为 `unused` 状态
+2. 浏览器解析文件,获取对应的文本和 chunk
+3. 给每个 chunk 打上 file_id
+4. 点击上传数据:将文件的状态改为 `used`,并将数据推送到 mongo `training` 表中等待训练
+5. 由训练线程从 mongo 中取数据,并在获取向量后插入到 pg。
\ No newline at end of file
diff --git a/docSite/content/docs/installation/upgrading/447.md b/docSite/content/docs/installation/upgrading/447.md
new file mode 100644
index 000000000..8160cbef0
--- /dev/null
+++ b/docSite/content/docs/installation/upgrading/447.md
@@ -0,0 +1,29 @@
+---
+title: 'V4.4.7'
+description: 'FastGPT V4.4.7 更新(需执行升级脚本)'
+icon: 'upgrade'
+draft: false
+toc: true
+weight: 840
+---
+
+## 执行初始化 API
+
+发起 1 个 HTTP 请求({{rootkey}} 替换成环境变量里的`rootkey`,{{host}}替换成自己域名)
+
+1. https://xxxxx/api/admin/initv445
+
+```bash
+curl --location --request POST 'https://{{host}}/api/admin/initv447' \
+--header 'rootkey: {{rootkey}}' \
+--header 'Content-Type: application/json'
+```
+
+初始化 pg 索引以及将 file_id 中空对象转成 manual 对象。如果数据多,可能需要较长时间,可以通过日志查看进度。
+
+## 功能介绍
+
+### Fast GPT V4.4.7
+
+1. 优化了数据库文件 crud。
+2. 兼容链接读取,作为 source。
\ No newline at end of file
diff --git a/packages/common/tools/file.ts b/packages/common/tools/file.ts
new file mode 100644
index 000000000..fdcd357a4
--- /dev/null
+++ b/packages/common/tools/file.ts
@@ -0,0 +1,23 @@
+import { strIsLink } from './str';
+
+export const fileImgs = [
+ { suffix: 'pdf', src: '/imgs/files/pdf.svg' },
+ { suffix: 'csv', src: '/imgs/files/csv.svg' },
+ { suffix: '(doc|docs)', src: '/imgs/files/doc.svg' },
+ { suffix: 'txt', src: '/imgs/files/txt.svg' },
+ { suffix: 'md', src: '/imgs/files/markdown.svg' },
+ { suffix: '.', src: '/imgs/files/file.svg' }
+];
+
+export function getFileIcon(name = '') {
+ return fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(name))?.src;
+}
+export function getSpecialFileIcon(name = '') {
+ if (name === 'manual') {
+ return '/imgs/files/manual.svg';
+ } else if (name === 'mark') {
+ return '/imgs/files/mark.svg';
+ } else if (strIsLink(name)) {
+ return '/imgs/files/link.svg';
+ }
+}
diff --git a/packages/common/tools/str.ts b/packages/common/tools/str.ts
new file mode 100644
index 000000000..cf88afad3
--- /dev/null
+++ b/packages/common/tools/str.ts
@@ -0,0 +1,5 @@
+export function strIsLink(str?: string) {
+ if (!str) return false;
+ if (/^((http|https)?:\/\/|www\.|\/)[^\s/$.?#].[^\s]*$/i.test(str)) return true;
+ return false;
+}
diff --git a/packages/core/dataset/constant.ts b/packages/core/dataset/constant.ts
new file mode 100644
index 000000000..cdb62af3a
--- /dev/null
+++ b/packages/core/dataset/constant.ts
@@ -0,0 +1,15 @@
+export enum DatasetSpecialIdEnum {
+ manual = 'manual',
+ mark = 'mark'
+}
+export const datasetSpecialIdMap = {
+ [DatasetSpecialIdEnum.manual]: {
+ name: 'kb.Manual Data',
+ sourceName: 'kb.Manual Input'
+ },
+ [DatasetSpecialIdEnum.mark]: {
+ name: 'kb.Mark Data',
+ sourceName: 'kb.Manual Mark'
+ }
+};
+export const datasetSpecialIds: string[] = [DatasetSpecialIdEnum.manual, DatasetSpecialIdEnum.mark];
diff --git a/packages/core/dataset/utils.ts b/packages/core/dataset/utils.ts
new file mode 100644
index 000000000..d950d7d51
--- /dev/null
+++ b/packages/core/dataset/utils.ts
@@ -0,0 +1,8 @@
+import { datasetSpecialIds } from './constant';
+import { strIsLink } from '@fastgpt/common/tools/str';
+
+export function isSpecialFileId(id: string) {
+ if (datasetSpecialIds.includes(id)) return true;
+ if (strIsLink(id)) return true;
+ return false;
+}
diff --git a/packages/core/package.json b/packages/core/package.json
index 46673ad04..0329f7d68 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -3,7 +3,9 @@
"version": "1.0.0",
"dependencies": {
"openai": "^3.3.0",
- "tunnel": "^0.0.6"
+ "tunnel": "^0.0.6",
+ "@fastgpt/common": "workspace:*",
+ "@fastgpt/support": "workspace:*"
},
"devDependencies": {
"@types/tunnel": "^0.0.4"
diff --git a/packages/support/package.json b/packages/support/package.json
index 583205a45..d3835b359 100644
--- a/packages/support/package.json
+++ b/packages/support/package.json
@@ -1,4 +1,7 @@
{
"name": "@fastgpt/support",
- "version": "1.0.0"
+ "version": "1.0.0",
+ "dependencies": {
+ "@fastgpt/common": "workspace:*"
+ }
}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index ddcc1c3ca..6d4511557 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -31,6 +31,12 @@ importers:
packages/core:
dependencies:
+ '@fastgpt/common':
+ specifier: workspace:*
+ version: link:../common
+ '@fastgpt/support':
+ specifier: workspace:*
+ version: link:../support
openai:
specifier: ^3.3.0
version: registry.npmmirror.com/openai@3.3.0
@@ -42,7 +48,11 @@ importers:
specifier: ^0.0.4
version: registry.npmmirror.com/@types/tunnel@0.0.4
- packages/support: {}
+ packages/support:
+ dependencies:
+ '@fastgpt/common':
+ specifier: workspace:*
+ version: link:../common
projects/app:
dependencies:
diff --git a/projects/app/package.json b/projects/app/package.json
index bb9af3626..f3c8dd820 100644
--- a/projects/app/package.json
+++ b/projects/app/package.json
@@ -1,6 +1,6 @@
{
"name": "app",
- "version": "4.4.6",
+ "version": "4.4.7",
"private": false,
"scripts": {
"dev": "next dev",
diff --git a/projects/app/public/docs/chatProblem.md b/projects/app/public/docs/chatProblem.md
index 24036cca0..99256c060 100644
--- a/projects/app/public/docs/chatProblem.md
+++ b/projects/app/public/docs/chatProblem.md
@@ -9,6 +9,6 @@
- [计费规则](https://doc.fastgpt.run/docs/pricing/)
**其他问题**
-| 交流群 | 小助手 |
-| ----------------------- | -------------------- |
-|  |  |
+| 添加小助手进入交流群 |
+| ----------------------- |
+|  |
diff --git a/projects/app/public/imgs/files/link.svg b/projects/app/public/imgs/files/link.svg
new file mode 100644
index 000000000..68534ba48
--- /dev/null
+++ b/projects/app/public/imgs/files/link.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/projects/app/public/imgs/files/manual.svg b/projects/app/public/imgs/files/manual.svg
new file mode 100644
index 000000000..fa8b3106a
--- /dev/null
+++ b/projects/app/public/imgs/files/manual.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/projects/app/public/imgs/files/mark.svg b/projects/app/public/imgs/files/mark.svg
new file mode 100644
index 000000000..8c86e0099
--- /dev/null
+++ b/projects/app/public/imgs/files/mark.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json
index 026678635..eb4811079 100644
--- a/projects/app/public/locales/en/common.json
+++ b/projects/app/public/locales/en/common.json
@@ -127,6 +127,8 @@
"Output": "Output",
"Password inconsistency": "Password inconsistency",
"Rename": "Rename",
+ "Rename Failed": "Rename Failed",
+ "Rename Success": "Rename Success",
"Search": "Search",
"Status": "Status",
"Update Successful": "Update Successful",
@@ -214,10 +216,14 @@
"Filename": "Filename",
"Files": "{{total}} Files",
"Folder Name": "Input folder name",
+ "Insert Data": "Insert",
+ "Manual Data": "Manual Data",
+ "Manual Input": "Manual Input",
+ "Manual Mark": "Manual Mark",
+ "Mark Data": "Mark Data",
"Move Failed": "Move Failed",
"My Dataset": "My Dataset",
"No Folder": "No Folder",
- "Other Data": "Other Data",
"Select Dataset": "Select Dataset",
"Select Folder": "Enter folder",
"Upload Time": "Upload Time",
diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json
index 102ac7a58..2c12ca555 100644
--- a/projects/app/public/locales/zh/common.json
+++ b/projects/app/public/locales/zh/common.json
@@ -127,6 +127,8 @@
"Output": "输出",
"Password inconsistency": "两次密码不一致",
"Rename": "重命名",
+ "Rename Failed": "重命名失败",
+ "Rename Success": "重命名成功",
"Search": "搜索",
"Status": "状态",
"Update Successful": "更新成功",
@@ -214,10 +216,14 @@
"Filename": "文件名",
"Files": "文件: {{total}}个",
"Folder Name": "输入文件夹名称",
+ "Insert Data": "插入",
+ "Manual Data": "手动录入",
+ "Manual Input": "手动录入",
+ "Manual Mark": "手动标注",
+ "Mark Data": "标注数据",
"Move Failed": "移动出现错误~",
"My Dataset": "我的知识库",
"No Folder": "没有子目录了~",
- "Other Data": "其他数据",
"Select Dataset": "选择该知识库",
"Select Folder": "进入文件夹",
"Upload Time": "上传时间",
diff --git a/projects/app/src/components/ChatBox/ResponseTags.tsx b/projects/app/src/components/ChatBox/ResponseTags.tsx
index 8f2f3194b..f06a172b6 100644
--- a/projects/app/src/components/ChatBox/ResponseTags.tsx
+++ b/projects/app/src/components/ChatBox/ResponseTags.tsx
@@ -36,7 +36,8 @@ const ResponseTags = ({ responseData = [] }: { responseData?: ChatHistoryItemRes
quoteList: responseData
.filter((item) => item.moduleType === FlowModuleTypeEnum.chatNode)
.map((item) => item.quoteList)
- .flat(),
+ .flat()
+ .filter((item) => item) as QuoteItemType[],
historyPreview: chatData?.historyPreview,
runningTime: +responseData.reduce((sum, item) => sum + (item.runningTime || 0), 0).toFixed(2)
};
diff --git a/projects/app/src/components/ChatBox/index.tsx b/projects/app/src/components/ChatBox/index.tsx
index b2e75e7d0..aa674f733 100644
--- a/projects/app/src/components/ChatBox/index.tsx
+++ b/projects/app/src/components/ChatBox/index.tsx
@@ -63,6 +63,7 @@ import styles from './index.module.scss';
import Script from 'next/script';
import { postQuestionGuide } from '@/api/core/ai/agent/api';
import { splitGuideModule } from './utils';
+import { DatasetSpecialIdEnum } from '@fastgpt/core/dataset/constant';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 24);
@@ -511,6 +512,12 @@ const ChatBox = (
// add guide text listener
useEffect(() => {
+ const windowMessage = ({ data }: MessageEvent<{ type: 'sendPrompt'; text: string }>) => {
+ if (data?.type === 'sendPrompt' && data?.text) {
+ handleSubmit((item) => sendPrompt(item, data.text))();
+ }
+ };
+ window.addEventListener('message', windowMessage);
event.on('guideClick', ({ text }: { text: string }) => {
if (!text) return;
handleSubmit((data) => sendPrompt(data, text))();
@@ -518,6 +525,7 @@ const ChatBox = (
return () => {
event.off('guideClick');
+ window.removeEventListener('message', windowMessage);
};
}, [handleSubmit, sendPrompt]);
@@ -995,7 +1003,8 @@ const ChatBox = (
defaultValues={{
dataId: adminMarkData.dataId,
q: adminMarkData.q,
- a: adminMarkData.a
+ a: adminMarkData.a,
+ file_id: DatasetSpecialIdEnum.mark
}}
/>
)}
diff --git a/projects/app/src/constants/common.ts b/projects/app/src/constants/common.ts
index fd076d299..4fefa5268 100644
--- a/projects/app/src/constants/common.ts
+++ b/projects/app/src/constants/common.ts
@@ -3,15 +3,6 @@ export enum UserAuthTypeEnum {
findPassword = 'findPassword'
}
-export const fileImgs = [
- { suffix: 'pdf', src: '/imgs/files/pdf.svg' },
- { suffix: 'csv', src: '/imgs/files/csv.svg' },
- { suffix: '(doc|docs)', src: '/imgs/files/doc.svg' },
- { suffix: 'txt', src: '/imgs/files/txt.svg' },
- { suffix: 'md', src: '/imgs/files/markdown.svg' },
- { suffix: '.', src: '/imgs/files/file.svg' }
-];
-
export enum TrackEventName {
windowError = 'windowError',
pageError = 'pageError',
diff --git a/projects/app/src/constants/dataset.ts b/projects/app/src/constants/dataset.ts
index ee15e954f..81ffff6e8 100644
--- a/projects/app/src/constants/dataset.ts
+++ b/projects/app/src/constants/dataset.ts
@@ -34,4 +34,3 @@ export const KbTypeMap = {
};
export const FolderAvatarSrc = '/imgs/files/folder.svg';
-export const OtherFileId = 'other';
diff --git a/projects/app/src/pages/api/admin/initv447.ts b/projects/app/src/pages/api/admin/initv447.ts
new file mode 100644
index 000000000..5d0ef3d18
--- /dev/null
+++ b/projects/app/src/pages/api/admin/initv447.ts
@@ -0,0 +1,109 @@
+import type { NextApiRequest, NextApiResponse } from 'next';
+import { jsonRes } from '@/service/response';
+import { authUser } from '@/service/utils/auth';
+import { connectToDatabase } from '@/service/mongo';
+import { PgClient } from '@/service/pg';
+import { PgDatasetTableName } from '@/constants/plugin';
+import { DatasetSpecialIdEnum } from '@fastgpt/core/dataset/constant';
+import mongoose, { Types } from 'mongoose';
+import { delay } from '@/utils/tools';
+
+export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+ let initFileIds: string[] = [];
+ try {
+ const { limit = 100 } = req.body;
+ await connectToDatabase();
+ await authUser({ req, authRoot: true });
+
+ console.log('add index');
+ await PgClient.query(
+ `
+ ALTER TABLE modeldata
+ ALTER COLUMN source TYPE VARCHAR(256),
+ ALTER COLUMN file_id TYPE VARCHAR(256);
+ CREATE INDEX IF NOT EXISTS modelData_fileId_index ON modeldata (file_id);
+ `
+ );
+ console.log('index success');
+ console.log('count rows');
+ // 去重获取 fileId
+ const { rows } = await PgClient.query(`SELECT DISTINCT file_id
+ FROM ${PgDatasetTableName} WHERE file_id IS NOT NULL AND file_id != '';
+ `);
+ console.log('count rows success', rows.length);
+ console.log('start filter');
+ for (let i = 0; i < rows.length; i += limit) {
+ await init(rows.slice(i, i + limit), initFileIds);
+ console.log(i);
+ }
+ console.log('filter success');
+ console.log('start update');
+
+ for (let i = 0; i < initFileIds.length; i++) {
+ await PgClient.query(`UPDATE ${PgDatasetTableName}
+ SET file_id = '${DatasetSpecialIdEnum.manual}'
+ WHERE file_id = '${initFileIds[i]}'`);
+ console.log('update: ', initFileIds[i]);
+ }
+
+ const { rows: emptyIds } = await PgClient.query(
+ `SELECT id FROM ${PgDatasetTableName} WHERE file_id IS NULL OR file_id=''`
+ );
+ console.log(emptyIds.length);
+
+ await delay(5000);
+
+ async function start(start: number) {
+ for (let i = start; i < emptyIds.length; i += limit) {
+ await PgClient.query(`UPDATE ${PgDatasetTableName}
+ SET file_id = '${DatasetSpecialIdEnum.manual}'
+ WHERE id = '${emptyIds[i].id}'`);
+ console.log('update: ', i, emptyIds[i].id);
+ }
+ }
+ for (let i = 0; i < limit; i++) {
+ start(i);
+ }
+
+ // await PgClient.query(
+ // `UPDATE ${PgDatasetTableName}
+ // SET file_id = '${DatasetSpecialIdEnum.manual}'
+ // WHERE file_id IS NULL OR file_id = ''`
+ // );
+
+ console.log('update success');
+
+ jsonRes(res, {
+ data: {
+ empty: emptyIds.length
+ }
+ });
+ } catch (error) {
+ jsonRes(res, {
+ code: 500,
+ error
+ });
+ }
+}
+
+async function init(rows: any[], initFileIds: string[]) {
+ const collection = mongoose.connection.db.collection(`dataset.files`);
+
+ /* 遍历所有的 fileId,去找有没有对应的文件,没有的话则改成manual */
+ const updateResult = await Promise.allSettled(
+ rows.map(async (item) => {
+ // 找下是否有对应的文件
+ const file = await collection.findOne({
+ _id: new Types.ObjectId(item.file_id)
+ });
+
+ if (file) return '';
+ // 没有文件的,改成manual
+ initFileIds.push(item.file_id);
+
+ return item.file_id;
+ })
+ );
+ // @ts-ignore
+ console.log(updateResult.filter((item) => item?.value).length);
+}
diff --git a/projects/app/src/pages/api/core/dataset/data/exportAll.ts b/projects/app/src/pages/api/core/dataset/data/exportAll.ts
index 3652fb12b..ddb5efe91 100644
--- a/projects/app/src/pages/api/core/dataset/data/exportAll.ts
+++ b/projects/app/src/pages/api/core/dataset/data/exportAll.ts
@@ -91,6 +91,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
if (res.closed) {
return stream.destroy();
}
+ q = q.replace(/"/g, '""');
+ a = a.replace(/"/g, '""');
+ source = source?.replace(/"/g, '""');
+
write(`\n"${q}","${a || ''}","${source || ''}"`);
});
// finish
diff --git a/projects/app/src/pages/api/core/dataset/data/getDataList.ts b/projects/app/src/pages/api/core/dataset/data/getDataList.ts
index 8314a4717..a09e2bd7d 100644
--- a/projects/app/src/pages/api/core/dataset/data/getDataList.ts
+++ b/projects/app/src/pages/api/core/dataset/data/getDataList.ts
@@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
-import { OtherFileId } from '@/constants/dataset';
import type { PgDataItemType } from '@/types/core/dataset/data';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
@@ -36,15 +35,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
['user_id', userId],
'AND',
['kb_id', kbId],
- ...(fileId
- ? fileId === OtherFileId
- ? ["AND (file_id IS NULL OR file_id = '')"]
- : ['AND', ['file_id', fileId]]
- : []),
+ 'AND',
+ ['file_id', fileId],
...(searchText
? [
'AND',
- `(q LIKE '%${searchText}%' OR a LIKE '%${searchText}%' OR source LIKE '%${searchText}%')`
+ `(q ILIKE '%${searchText}%' OR a ILIKE '%${searchText}%' OR source ILIKE '%${searchText}%')`
]
: [])
];
diff --git a/projects/app/src/pages/api/core/dataset/data/insertData.ts b/projects/app/src/pages/api/core/dataset/data/insertData.ts
index 23e351045..dace05c99 100644
--- a/projects/app/src/pages/api/core/dataset/data/insertData.ts
+++ b/projects/app/src/pages/api/core/dataset/data/insertData.ts
@@ -1,3 +1,7 @@
+/*
+ insert one data to dataset (immediately insert)
+ manual input or mark data
+*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
@@ -11,7 +15,6 @@ import { DatasetDataItemType } from '@/types/core/dataset/data';
import { countPromptTokens } from '@/utils/common/tiktoken';
export type Props = {
- billId?: string;
kbId: string;
data: DatasetDataItemType;
};
@@ -40,7 +43,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
export async function getVectorAndInsertDataset(
props: Props & { userId: string }
): Promise {
- const { kbId, data, userId, billId } = props;
+ const { kbId, data, userId } = props;
if (!kbId || !data?.q) {
return Promise.reject('缺少参数');
}
@@ -61,7 +64,7 @@ export async function getVectorAndInsertDataset(
const { rows: existsRows } = await PgClient.query(`
SELECT COUNT(*) > 0 AS exists
FROM ${PgDatasetTableName}
- WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND kb_id='${kbId}'
+ WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND file_id='${data.file_id}' AND kb_id='${kbId}'
`);
const exists = existsRows[0]?.exists || false;
@@ -72,8 +75,7 @@ export async function getVectorAndInsertDataset(
const { vectors } = await getVector({
model: kb.vectorModel,
input: [q],
- userId,
- billId
+ userId
});
const response = await insertData2Dataset({
diff --git a/projects/app/src/pages/api/core/dataset/file/delById.ts b/projects/app/src/pages/api/core/dataset/file/delById.ts
index cb95c5812..dbc82a72d 100644
--- a/projects/app/src/pages/api/core/dataset/file/delById.ts
+++ b/projects/app/src/pages/api/core/dataset/file/delById.ts
@@ -6,7 +6,7 @@ import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { Types } from 'mongoose';
-import { OtherFileId } from '@/constants/dataset';
+import { isSpecialFileId } from '@fastgpt/core/dataset/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -22,14 +22,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const { userId } = await authUser({ req, authToken: true });
// other data. Delete only vector data
- if (fileId === OtherFileId) {
+ if (isSpecialFileId(fileId)) {
await PgClient.delete(PgDatasetTableName, {
- where: [
- ['user_id', userId],
- 'AND',
- ['kb_id', kbId],
- "AND (file_id IS NULL OR file_id = '')"
- ]
+ where: [['user_id', userId], 'AND', ['kb_id', kbId], 'AND', ['file_id', fileId]]
});
} else {
// auth file
@@ -48,7 +43,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
file_id: fileId
});
- // delete file
+ // delete file
await bucket.delete(new Types.ObjectId(fileId));
}
diff --git a/projects/app/src/pages/api/core/dataset/file/detail.ts b/projects/app/src/pages/api/core/dataset/file/detail.ts
index dc3567040..115800b4e 100644
--- a/projects/app/src/pages/api/core/dataset/file/detail.ts
+++ b/projects/app/src/pages/api/core/dataset/file/detail.ts
@@ -3,8 +3,12 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
-import { OtherFileId } from '@/constants/dataset';
+import { datasetSpecialIdMap } from '@fastgpt/core/dataset/constant';
+import { datasetSpecialIds } from '@fastgpt/core/dataset/constant';
import type { GSFileInfoType } from '@/types/common/file';
+import { strIsLink } from '@fastgpt/common/tools/str';
+import { PgClient } from '@/service/pg';
+import { PgDatasetTableName } from '@/constants/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -14,12 +18,32 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
- if (fileId === OtherFileId) {
+ // manual, mark
+ if (datasetSpecialIds.includes(fileId)) {
return jsonRes(res, {
data: {
- id: OtherFileId,
+ id: fileId,
size: 0,
- filename: 'kb.Other Data',
+ // @ts-ignore
+ filename: datasetSpecialIdMap[fileId]?.name || fileId,
+ uploadDate: new Date(),
+ encoding: '',
+ contentType: ''
+ }
+ });
+ }
+ // link file
+ if (strIsLink(fileId)) {
+ const { rows } = await PgClient.select(PgDatasetTableName, {
+ where: [['user_id', userId], 'AND', ['file_id', fileId]],
+ limit: 1,
+ fields: ['source']
+ });
+ return jsonRes(res, {
+ data: {
+ id: fileId,
+ size: 0,
+ filename: rows[0]?.source || fileId,
uploadDate: new Date(),
encoding: '',
contentType: ''
diff --git a/projects/app/src/pages/api/core/dataset/file/list.ts b/projects/app/src/pages/api/core/dataset/file/list.ts
index 536e244a7..49037fa61 100644
--- a/projects/app/src/pages/api/core/dataset/file/list.ts
+++ b/projects/app/src/pages/api/core/dataset/file/list.ts
@@ -5,7 +5,14 @@ import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
-import { FileStatusEnum, OtherFileId } from '@/constants/dataset';
+import { FileStatusEnum } from '@/constants/dataset';
+import { strIsLink } from '@fastgpt/common/tools/str';
+import {
+ DatasetSpecialIdEnum,
+ datasetSpecialIdMap,
+ datasetSpecialIds
+} from '@fastgpt/core/dataset/constant';
+import { Types } from 'mongoose';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -22,57 +29,106 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
+ // select and count same file_id data, exclude special id
+ const pgWhere = `user_id = '${userId}' AND kb_id = '${kbId}' ${datasetSpecialIds
+ .map((item) => `AND file_id!='${item}'`)
+ .join(' ')}
+ ${searchText ? `AND source ILIKE '%${searchText}%'` : ''}`;
+
+ const [{ rows }, { rowCount: total }] = await Promise.all([
+ PgClient.query(`SELECT file_id, COUNT(*) AS count
+ FROM ${PgDatasetTableName}
+ where ${pgWhere}
+ GROUP BY file_id
+ ORDER BY file_id DESC
+ LIMIT ${pageSize} OFFSET ${(pageNum - 1) * pageSize};
+ `),
+ PgClient.query(`SELECT DISTINCT file_id
+ FROM ${PgDatasetTableName}
+ where ${pgWhere}
+ `)
+ ]);
+
// find files
const gridFs = new GridFSStorage('dataset', userId);
const collection = gridFs.Collection();
- const mongoWhere = {
- ['metadata.kbId']: kbId,
- ['metadata.userId']: userId,
- ['metadata.datasetUsed']: true,
- ...(searchText && { filename: { $regex: searchText } })
- };
- const [files, total] = await Promise.all([
- collection
- .find(mongoWhere, {
- projection: {
- _id: 1,
- filename: 1,
- uploadDate: 1,
- length: 1
- }
- })
- .skip((pageNum - 1) * pageSize)
- .limit(pageSize)
- .sort({ uploadDate: -1 })
- .toArray(),
- collection.countDocuments(mongoWhere)
- ]);
-
- async function GetOtherData() {
- return {
- id: OtherFileId,
- size: 0,
- filename: 'kb.Other Data',
- uploadTime: new Date(),
- status: (await TrainingData.findOne({ userId, kbId, file_id: '' }))
- ? FileStatusEnum.embedding
- : FileStatusEnum.ready,
- chunkLength: await PgClient.count(PgDatasetTableName, {
- fields: ['id'],
- where: [
- ['user_id', userId],
- 'AND',
- ['kb_id', kbId],
- "AND (file_id IS NULL OR file_id = '')"
- ]
- })
- };
+ async function getSpecialData() {
+ if (pageNum !== 1) return [];
+ return [
+ {
+ id: DatasetSpecialIdEnum.manual,
+ size: 0,
+ filename: datasetSpecialIdMap[DatasetSpecialIdEnum.manual].name,
+ uploadTime: new Date(),
+ status: FileStatusEnum.ready,
+ chunkLength: await PgClient.count(PgDatasetTableName, {
+ fields: ['id'],
+ where: [
+ ['user_id', userId],
+ 'AND',
+ ['file_id', DatasetSpecialIdEnum.manual],
+ 'AND',
+ ['kb_id', kbId]
+ ]
+ })
+ },
+ {
+ id: DatasetSpecialIdEnum.mark,
+ size: 0,
+ filename: datasetSpecialIdMap[DatasetSpecialIdEnum.mark].name,
+ uploadTime: new Date(),
+ status: FileStatusEnum.ready,
+ chunkLength: await PgClient.count(PgDatasetTableName, {
+ fields: ['id'],
+ where: [
+ ['user_id', userId],
+ 'AND',
+ ['file_id', DatasetSpecialIdEnum.mark],
+ 'AND',
+ ['kb_id', kbId]
+ ]
+ })
+ }
+ ];
}
const data = await Promise.all([
- GetOtherData(),
- ...files.map(async (file) => {
+ getSpecialData(),
+ ...rows.map(async (row) => {
+ // link data
+ if (strIsLink(row.file_id)) {
+ const { rows } = await PgClient.select(PgDatasetTableName, {
+ where: [['user_id', userId], 'AND', ['file_id', row.file_id]],
+ limit: 1,
+ fields: ['source']
+ });
+ return {
+ id: row.file_id,
+ size: 0,
+ filename: rows[0]?.source || row.file_id,
+ uploadTime: new Date(),
+ status: FileStatusEnum.ready,
+ chunkLength: row.count
+ };
+ }
+ // file data
+ const file = await collection.findOne(
+ {
+ _id: new Types.ObjectId(row.file_id),
+ ['metadata.userId']: userId,
+ ['metadata.kbId']: kbId
+ },
+ {
+ projection: {
+ _id: 1,
+ filename: 1,
+ uploadDate: 1,
+ length: 1
+ }
+ }
+ );
+ if (!file) return null;
return {
id: String(file._id),
size: file.length,
@@ -81,16 +137,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
status: (await TrainingData.findOne({ userId, kbId, file_id: file._id }))
? FileStatusEnum.embedding
: FileStatusEnum.ready,
- chunkLength: await PgClient.count(PgDatasetTableName, {
- fields: ['id'],
- where: [
- ['user_id', userId],
- 'AND',
- ['kb_id', kbId],
- 'AND',
- ['file_id', String(file._id)]
- ]
- })
+ chunkLength: row.count
};
})
]);
@@ -99,7 +146,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
data: {
pageNum,
pageSize,
- data: data.flat(),
+ data: data.flat().filter((item) => item),
total
}
});
diff --git a/projects/app/src/pages/api/core/dataset/file/update.ts b/projects/app/src/pages/api/core/dataset/file/update.ts
index df95a327d..eeb2947ef 100644
--- a/projects/app/src/pages/api/core/dataset/file/update.ts
+++ b/projects/app/src/pages/api/core/dataset/file/update.ts
@@ -8,6 +8,7 @@ import { Types } from 'mongoose';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { addLog } from '@/service/utils/tools';
+import { strIsLink } from '@fastgpt/common/tools/str';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -19,20 +20,22 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const gridFs = new GridFSStorage('dataset', userId);
const collection = gridFs.Collection();
- await collection.findOneAndUpdate(
- {
- _id: new Types.ObjectId(id)
- },
- {
- $set: {
- ...(name && { filename: name }),
- ...(datasetUsed && { ['metadata.datasetUsed']: datasetUsed })
+ if (id.length === 24 && !strIsLink(id)) {
+ await collection.findOneAndUpdate(
+ {
+ _id: new Types.ObjectId(id)
+ },
+ {
+ $set: {
+ ...(name && { filename: name }),
+ ...(datasetUsed && { ['metadata.datasetUsed']: datasetUsed })
+ }
}
- }
- );
+ );
+ }
// data source
- updateDatasetSource({
+ await updateDatasetSource({
fileId: id,
userId,
name
diff --git a/projects/app/src/pages/kb/detail/components/DataCard.tsx b/projects/app/src/pages/kb/detail/components/DataCard.tsx
index f19f0c55c..79195638d 100644
--- a/projects/app/src/pages/kb/detail/components/DataCard.tsx
+++ b/projects/app/src/pages/kb/detail/components/DataCard.tsx
@@ -1,31 +1,28 @@
import React, { useCallback, useState, useRef, useMemo } from 'react';
-import { Box, Card, IconButton, Flex, Grid, Image } from '@chakra-ui/react';
+import { Box, Card, IconButton, Flex, Grid, Image, Button } from '@chakra-ui/react';
import type { PgDataItemType } from '@/types/core/dataset/data';
import { usePagination } from '@/hooks/usePagination';
-import {
- getDatasetDataList,
- delOneDatasetDataById,
- getTrainingData
-} from '@/api/core/dataset/data';
+import { getDatasetDataList, delOneDatasetDataById } from '@/api/core/dataset/data';
import { getFileInfoById } from '@/api/core/dataset/file';
import { DeleteIcon, RepeatIcon } from '@chakra-ui/icons';
import { useQuery } from '@tanstack/react-query';
import { useToast } from '@/hooks/useToast';
-import InputModal, { FormData as InputDataType } from './InputDataModal';
+import InputModal, { FormData as InputDataType, RawFileText } from './InputDataModal';
import { debounce } from 'lodash';
import { getErrText } from '@/utils/tools';
import { useConfirm } from '@/hooks/useConfirm';
import { useTranslation } from 'react-i18next';
import { useRouter } from 'next/router';
import MyIcon from '@/components/Icon';
-import MyTooltip from '@/components/MyTooltip';
import MyInput from '@/components/MyInput';
-import { fileImgs } from '@/constants/common';
+import { useLoading } from '@/hooks/useLoading';
+import { getFileIcon, getSpecialFileIcon } from '@fastgpt/common/tools/file';
const DataCard = ({ kbId }: { kbId: string }) => {
const BoxRef = useRef(null);
const lastSearch = useRef('');
const router = useRouter();
+ const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
const { fileId = '' } = router.query as { fileId: string };
const { t } = useTranslation();
const [searchText, setSearchText] = useState('');
@@ -37,7 +34,6 @@ const DataCard = ({ kbId }: { kbId: string }) => {
const {
data: kbDataList,
- isLoading,
Pagination,
total,
getData,
@@ -52,6 +48,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
fileId
},
onChange() {
+ setIsLoading(false);
if (BoxRef.current) {
BoxRef.current.scrollTop = 0;
}
@@ -72,9 +69,8 @@ const DataCard = ({ kbId }: { kbId: string }) => {
// get file info
const { data: fileInfo } = useQuery(['getFileInfo', fileId], () => getFileInfoById(fileId));
const fileIcon = useMemo(
- () =>
- fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(fileInfo?.filename || ''))?.src,
- [fileInfo?.filename]
+ () => getSpecialFileIcon(fileInfo?.id) || getFileIcon(fileInfo?.filename),
+ [fileInfo?.filename, fileInfo?.id]
);
return (
@@ -82,10 +78,9 @@ const DataCard = ({ kbId }: { kbId: string }) => {
}
+ icon={}
bg={'white'}
boxShadow={'1px 1px 9px rgba(0,0,0,0.15)'}
- h={'28px'}
size={'sm'}
borderRadius={'50%'}
aria-label={''}
@@ -98,30 +93,34 @@ const DataCard = ({ kbId }: { kbId: string }) => {
})
}
/>
-
+
- {t(fileInfo?.filename || 'Filename')}
+
-
- }
- size={['sm', 'md']}
- aria-label={'refresh'}
- variant={'base'}
- isLoading={isLoading}
- onClick={() => {
- getData(pageNum);
- getTrainingData({ kbId, init: true });
- }}
- />
-
+
@@ -249,6 +248,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
/>
)}
+
);
};
diff --git a/projects/app/src/pages/kb/detail/components/FileCard.tsx b/projects/app/src/pages/kb/detail/components/FileCard.tsx
index 9d343f8ce..9bf6b77e3 100644
--- a/projects/app/src/pages/kb/detail/components/FileCard.tsx
+++ b/projects/app/src/pages/kb/detail/components/FileCard.tsx
@@ -22,16 +22,17 @@ import { useTranslation } from 'react-i18next';
import MyIcon from '@/components/Icon';
import MyInput from '@/components/MyInput';
import dayjs from 'dayjs';
-import { fileImgs } from '@/constants/common';
import { useRequest } from '@/hooks/useRequest';
import { useLoading } from '@/hooks/useLoading';
-import { FileStatusEnum, OtherFileId } from '@/constants/dataset';
+import { FileStatusEnum } from '@/constants/dataset';
import { useRouter } from 'next/router';
import { usePagination } from '@/hooks/usePagination';
import type { DatasetFileItemType } from '@/types/core/dataset/file';
import { useGlobalStore } from '@/store/global';
import MyMenu from '@/components/MyMenu';
import { useEditTitle } from '@/hooks/useEditTitle';
+import { datasetSpecialIds } from '@fastgpt/core/dataset/constant';
+import { getFileIcon, getSpecialFileIcon } from '@fastgpt/common/tools/file';
const FileCard = ({ kbId }: { kbId: string }) => {
const BoxRef = useRef(null);
@@ -79,10 +80,14 @@ const FileCard = ({ kbId }: { kbId: string }) => {
// add file icon
const formatFiles = useMemo(
() =>
- files.map((file) => ({
- ...file,
- icon: fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(file.filename))?.src
- })),
+ files.map((file) => {
+ const icon = getSpecialFileIcon(file.id) || getFileIcon(file.filename);
+
+ return {
+ ...file,
+ icon
+ };
+ }),
[files]
);
@@ -114,8 +119,8 @@ const FileCard = ({ kbId }: { kbId: string }) => {
onSettled() {
setLoading(false);
},
- successToast: t('common.Delete Success'),
- errorToast: t('common.Delete Failed')
+ successToast: t('common.Rename Success'),
+ errorToast: t('common.Rename Failed')
});
const { onOpenModal, EditModal: EditTitleModal } = useEditTitle({
@@ -135,11 +140,15 @@ const FileCard = ({ kbId }: { kbId: string }) => {
// training data
const { data: { qaListLen = 0, vectorListLen = 0 } = {}, refetch: refetchTrainingData } =
- useQuery(['getModelSplitDataList', kbId], () => getTrainingData({ kbId, init: false }), {
- onError(err) {
- console.log(err);
+ useQuery(
+ ['getModelSplitDataList', kbId],
+ () => getTrainingData({ kbId, init: Math.random() > 0.7 }),
+ {
+ onError(err) {
+ console.log(err);
+ }
}
- });
+ );
useQuery(
['refetchTrainingData', kbId],
@@ -279,7 +288,7 @@ const FileCard = ({ kbId }: { kbId: string }) => {
}
menuList={[
- ...(file.id !== OtherFileId
+ ...(!datasetSpecialIds.includes(file.id)
? [
{
child: (
diff --git a/projects/app/src/pages/kb/detail/components/Import/CreateFileModal.tsx b/projects/app/src/pages/kb/detail/components/Import/CreateFileModal.tsx
index e9b6e8988..545b519af 100644
--- a/projects/app/src/pages/kb/detail/components/Import/CreateFileModal.tsx
+++ b/projects/app/src/pages/kb/detail/components/Import/CreateFileModal.tsx
@@ -3,13 +3,14 @@ import { useTranslation } from 'next-i18next';
import MyModal from '@/components/MyModal';
import { Box, Input, Textarea, ModalBody, ModalFooter, Button } from '@chakra-ui/react';
import { useForm } from 'react-hook-form';
+import { useRequest } from '@/hooks/useRequest';
const CreateFileModal = ({
onClose,
onSuccess
}: {
onClose: () => void;
- onSuccess: (e: { filename: string; content: string }) => void;
+ onSuccess: (e: { filename: string; content: string }) => Promise;
}) => {
const { t } = useTranslation();
const { register, handleSubmit } = useForm({
@@ -19,6 +20,13 @@ const CreateFileModal = ({
}
});
+ const { mutate, isLoading } = useRequest({
+ mutationFn: () => handleSubmit(onSuccess)(),
+ onSuccess: () => {
+ onClose();
+ }
+ });
+
return (
@@ -47,12 +55,7 @@ const CreateFileModal = ({
-