Optimize the file storage structure of the knowledge base (#386)

This commit is contained in:
Archer
2023-10-10 22:41:05 +08:00
committed by GitHub
parent 29d152784f
commit d0041a98b4
41 changed files with 591 additions and 231 deletions

View File

@@ -2,7 +2,7 @@
"editor.formatOnSave": true,
"editor.mouseWheelZoom": true,
"typescript.tsdk": "node_modules/typescript/lib",
"editor.defaultFormatter": "esbenp.prettier-vscode",
"prettier.prettierPath": "./node_modules/prettier",
"i18n-ally.localesPaths": [
"projects/app/public/locales"
],

View File

@@ -3,7 +3,7 @@
## 本地运行
1. 安装 go 语言环境。
2. 安装 hugo。 [二进制下载](https://github.com/gohugoio/hugo/releases/tag/v0.117.0)
2. 安装 hugo。 [二进制下载](https://github.com/gohugoio/hugo/releases/tag/v0.117.0),注意需要安装 extended 版本。
3. cd docSite
4. hugo serve
5. 访问 http://localhost:1313

View File

@@ -84,6 +84,14 @@ weight: 520
"maxToken": 16000,
"price": 0,
"prompt": ""
},
"QGModel": { // 生成下一步指引模型
"model": "gpt-3.5-turbo",
"name": "GPT35-4k",
"maxToken": 4000,
"price": 0,
"prompt": "",
"functionCall": false
}
}
```

View File

@@ -0,0 +1,8 @@
---
weight: 540
title: "设计方案"
description: "FastGPT 部分设计方案"
icon: public
draft: false
images: []
---

View File

@@ -0,0 +1,25 @@
---
weight: 541
title: "数据集"
description: "FastGPT 数据集中文件与数据的设计方案"
icon: dataset
draft: false
images: []
---
## 文件与数据的关系
在 FastGPT 中,文件会通过 MongoDB 的 FS 存储,而具体的数据会通过 PostgreSQL 存储PG 中的数据会有一列 file_id关联对应的文件。考虑到旧版本的兼容以及手动输入、标注数据等我们给 file_id 增加了一些特殊的值,如下:
- manual: 手动输入
- mark: 手动标注的数据
注意file_id 仅在插入数据时会写入,变更时无法修改。
## 文件导入流程
1. 上传文件到 MongoDB 的 FS 中,获取 file_id此时文件标记为 `unused` 状态
2. 浏览器解析文件,获取对应的文本和 chunk
3. 给每个 chunk 打上 file_id
4. 点击上传数据:将文件的状态改为 `used`,并将数据推送到 mongo `training` 表中等待训练
5. 由训练线程从 mongo 中取数据,并在获取向量后插入到 pg。

View File

@@ -0,0 +1,29 @@
---
title: 'V4.4.7'
description: 'FastGPT V4.4.7 更新(需执行升级脚本)'
icon: 'upgrade'
draft: false
toc: true
weight: 840
---
## 执行初始化 API
发起 1 个 HTTP 请求({{rootkey}} 替换成环境变量里的`rootkey`{{host}}替换成自己域名)
1. https://xxxxx/api/admin/initv445
```bash
curl --location --request POST 'https://{{host}}/api/admin/initv447' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
初始化 pg 索引以及将 file_id 中空对象转成 manual 对象。如果数据多,可能需要较长时间,可以通过日志查看进度。
## 功能介绍
### Fast GPT V4.4.7
1. 优化了数据库文件 crud。
2. 兼容链接读取,作为 source。

View File

@@ -0,0 +1,23 @@
import { strIsLink } from './str';
export const fileImgs = [
{ suffix: 'pdf', src: '/imgs/files/pdf.svg' },
{ suffix: 'csv', src: '/imgs/files/csv.svg' },
{ suffix: '(doc|docs)', src: '/imgs/files/doc.svg' },
{ suffix: 'txt', src: '/imgs/files/txt.svg' },
{ suffix: 'md', src: '/imgs/files/markdown.svg' },
{ suffix: '.', src: '/imgs/files/file.svg' }
];
export function getFileIcon(name = '') {
return fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(name))?.src;
}
export function getSpecialFileIcon(name = '') {
if (name === 'manual') {
return '/imgs/files/manual.svg';
} else if (name === 'mark') {
return '/imgs/files/mark.svg';
} else if (strIsLink(name)) {
return '/imgs/files/link.svg';
}
}

View File

@@ -0,0 +1,5 @@
export function strIsLink(str?: string) {
if (!str) return false;
if (/^((http|https)?:\/\/|www\.|\/)[^\s/$.?#].[^\s]*$/i.test(str)) return true;
return false;
}

View File

@@ -0,0 +1,15 @@
export enum DatasetSpecialIdEnum {
manual = 'manual',
mark = 'mark'
}
export const datasetSpecialIdMap = {
[DatasetSpecialIdEnum.manual]: {
name: 'kb.Manual Data',
sourceName: 'kb.Manual Input'
},
[DatasetSpecialIdEnum.mark]: {
name: 'kb.Mark Data',
sourceName: 'kb.Manual Mark'
}
};
export const datasetSpecialIds: string[] = [DatasetSpecialIdEnum.manual, DatasetSpecialIdEnum.mark];

View File

@@ -0,0 +1,8 @@
import { datasetSpecialIds } from './constant';
import { strIsLink } from '@fastgpt/common/tools/str';
export function isSpecialFileId(id: string) {
if (datasetSpecialIds.includes(id)) return true;
if (strIsLink(id)) return true;
return false;
}

View File

@@ -3,7 +3,9 @@
"version": "1.0.0",
"dependencies": {
"openai": "^3.3.0",
"tunnel": "^0.0.6"
"tunnel": "^0.0.6",
"@fastgpt/common": "workspace:*",
"@fastgpt/support": "workspace:*"
},
"devDependencies": {
"@types/tunnel": "^0.0.4"

View File

@@ -1,4 +1,7 @@
{
"name": "@fastgpt/support",
"version": "1.0.0"
"version": "1.0.0",
"dependencies": {
"@fastgpt/common": "workspace:*"
}
}

12
pnpm-lock.yaml generated
View File

@@ -31,6 +31,12 @@ importers:
packages/core:
dependencies:
'@fastgpt/common':
specifier: workspace:*
version: link:../common
'@fastgpt/support':
specifier: workspace:*
version: link:../support
openai:
specifier: ^3.3.0
version: registry.npmmirror.com/openai@3.3.0
@@ -42,7 +48,11 @@ importers:
specifier: ^0.0.4
version: registry.npmmirror.com/@types/tunnel@0.0.4
packages/support: {}
packages/support:
dependencies:
'@fastgpt/common':
specifier: workspace:*
version: link:../common
projects/app:
dependencies:

View File

@@ -1,6 +1,6 @@
{
"name": "app",
"version": "4.4.6",
"version": "4.4.7",
"private": false,
"scripts": {
"dev": "next dev",

View File

@@ -9,6 +9,6 @@
- [计费规则](https://doc.fastgpt.run/docs/pricing/)
**其他问题**
| 交流群 | 小助手 |
| ----------------------- | -------------------- |
| ![](https://otnvvf-imgs.oss.laf.run/wxqun300.jpg) | ![](https://otnvvf-imgs.oss.laf.run/wx300.jpg) |
| 添加小助手进入交流群 |
| ----------------------- |
| ![](https://otnvvf-imgs.oss.laf.run/wx300.jpg) |

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1696841646763" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="2367" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M185.78023917 244.70891583a668.28725918 668.28725918 0 0 0 121.17266584 53.53692476 665.15318203 665.15318203 0 0 0-34.64702231 183.62637022H91.72167743a419.67636227 419.67636227 0 0 1 94.05856174-237.16329498z m227.58241722-142.44192964a672.20323745 672.20323745 0 0 0-84.326223 139.79071526 609.56829772 609.56829772 0 0 1-99.6923923-42.47897884 420.36764571 420.36764571 0 0 1 184.0186153-97.31173642z m381.2622336 97.31044188a607.36887719 607.36887719 0 0 1-99.69109776 42.48027338 672.20323745 672.20323745 0 0 0-84.326223-139.79071526c69.74454391 16.75132524 132.56071902 50.73424941 184.0186153 97.31044188z m43.56509646 45.13148776a419.67636227 419.67636227 0 0 1 94.08704158 237.16329498h-180.52336198a664.06835895 664.06835895 0 0 0-34.70657106-183.62637022 668.28725918 668.28725918 0 0 0 121.17266584-53.53692476z m0 534.58216834a668.28725918 668.28725918 0 0 0-121.17266583-53.53692476 663.64763402 663.64763402 0 0 0 34.70657106-183.62637022h180.52336197a419.58574459 419.58574459 0 0 1-94.0572672 237.16329498zM694.93379223 781.91122963c34.70657106 11.23788674 68.02798617 25.5179384 99.72216668 42.48027339a420.21747927 420.21747927 0 0 1-184.04838968 97.31044187A671.42004179 671.42004179 0 0 0 694.96356661 781.91122963z m-213.06158142-28.07853511v155.75884484a607.60966131 607.60966131 0 0 1-93.63524773-143.31574298 605.26136889 605.26136889 0 0 1 93.63524773-12.44310186z m0-211.70490533v151.54123916a662.9537615 662.9537615 0 0 0-116.59258944 15.96683503 600.07803765 600.07803765 0 0 1-32.74922919-167.50807419h149.34181863z m-93.63524773-284.43335995a608.06145517 608.06145517 0 0 1 93.63524773-143.31574298v155.75884484a594.47398147 594.47398147 0 0 1-93.63524773-12.44310186z m93.63524773 72.60676804v151.57101353h-149.31204425c3.0136851-58.44710843 14.19072853-114.63395303 32.68968043-167.50807419 37.748736 8.6759955 76.70398167 14.10011085 116.62236382 15.96683503z m60.25557838-60.16366618v-155.75884484a607.00770101 607.00770101 0 0 1 93.60547335 143.31574298c-30.42812208 6.50764389-61.67180325 10.78609289-93.60547335 12.44310186z m0 211.73467971V330.33097165a660.18215506 660.18215506 0 0 0 116.59258944-15.99660942 601.1926351 601.1926351 0 0 1 32.71816027 167.5080742h-149.31204423z m93.60547335 284.40358557a606.55590717 606.55590717 0 0 1-93.60547335 143.34551736v-155.75884484c31.93496462 1.6880779 63.17735127 5.93545797 93.60547335 12.41332748z m-93.60547335-72.60676803V542.12778919h149.31204425a601.1926351 601.1926351 0 0 1-32.71945481 167.50807419 662.98483042 662.98483042 0 0 0-116.59258944-15.96683503zM229.34404109 824.42127739a606.97792663 606.97792663 0 0 1 99.72216668-42.51004776 670.24460105 670.24460105 0 0 0 84.35729193 139.7920098 420.30809695 420.30809695 0 0 1-184.07945861-97.31173641z m-43.56380192-45.13019322a419.58574459 419.58574459 0 0 1-94.05856174-237.16329498H272.27740286c3.01239056 63.9903213 14.88330651 125.60128126 34.67679669 183.62637022a668.28725918 668.28725918 0 0 0-121.17266584 53.53692476z m703.53624178 32.17574874A479.71963638 479.71963638 0 0 0 994.03944897 512c0-135.30255107-55.91758064-257.37880462-145.66662447-344.95949685A480.23097901 480.23097901 0 0 0 512 29.96184557a480.23097901 480.23097901 0 0 0-336.3728245 137.07865758c-14.61145347 14.28134622-28.22999609 29.49475998-40.9721363 45.49266394A479.6290187 479.6290187 0 0 0 29.9592565 512c0 266.2062611 215.80341349 482.03944897 482.03944896 482.03944897a480.23097901 480.23097901 0 0 0 336.34305011-137.11102104h0.02977438a484.14825182 484.14825182 0 0 0 40.9721363-45.4913694z" fill="#21A3DD" p-id="2368"></path></svg>

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1696841469699" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="11288" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M254.976 100.8h514.048a154.176 154.176 0 0 1 154.176 154.176v514.048a154.176 154.176 0 0 1-154.176 154.176h-514.048a154.176 154.176 0 0 1-154.176-154.176v-514.048a154.176 154.176 0 0 1 154.176-154.176z" fill="#1693F6" opacity=".3" p-id="11289"></path><path d="M192 128a64 64 0 0 0-64 64v640a64 64 0 0 0 64 64h640a64 64 0 0 0 64-64v-640a64 64 0 0 0-64-64h-640m0-128h640a192 192 0 0 1 192 192v640a192 192 0 0 1-192 192h-640a192 192 0 0 1-192-192v-640a192 192 0 0 1 192-192z" fill="#1693F6" p-id="11290"></path><path d="M320 832m-64 0l0 0q-64 0-64-64l0-512q0-64 64-64l0 0q64 0 64 64l0 512q0 64-64 64Z" fill="#1693F6" p-id="11291"></path></svg>

After

Width:  |  Height:  |  Size: 974 B

View File

@@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1696841493938" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="16677" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M102.4 332.8L170.666667 264.533333l42.666666 42.666667-68.266666 68.266667 42.666666 42.666666L256 345.6l42.666667 42.666667-68.266667 68.266666 12.8 34.133334L469.333333 268.8 315.733333 102.4c-25.6-21.333333-72.533333-17.066667-93.866666 8.533333L76.8 247.466667c-21.333333 21.333333-12.8 51.2 8.533333 72.533333l17.066667 12.8zM85.333333 896c-4.266667 17.066667 8.533333 29.866667 25.6 25.6l260.266667-81.066667L170.666667 644.266667 85.333333 896zM934.4 302.933333c21.333333-21.333333 21.333333-51.2 0-72.533333L810.666667 110.933333c-21.333333-21.333333-55.466667-21.333333-76.8 0l-89.6 85.333334 200.533333 192 89.6-85.333334zM614.4 226.133333l-413.866667 392.533334L401.066667 810.666667l413.866666-396.8zM947.2 699.733333l-187.733333-174.933333-230.4 217.6 42.666666 42.666667 72.533334-68.266667 42.666666 42.666667-72.533333 68.266666 42.666667 42.666667 72.533333-68.266667 42.666667 42.666667-72.533334 68.266667 12.8 12.8c21.333333 21.333333 55.466667 29.866667 81.066667 8.533333l145.066667-136.533333c21.333333-29.866667 29.866667-76.8 8.533333-98.133334z" p-id="16678" fill="#d4237a"></path></svg>

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@@ -127,6 +127,8 @@
"Output": "Output",
"Password inconsistency": "Password inconsistency",
"Rename": "Rename",
"Rename Failed": "Rename Failed",
"Rename Success": "Rename Success",
"Search": "Search",
"Status": "Status",
"Update Successful": "Update Successful",
@@ -214,10 +216,14 @@
"Filename": "Filename",
"Files": "{{total}} Files",
"Folder Name": "Input folder name",
"Insert Data": "Insert",
"Manual Data": "Manual Data",
"Manual Input": "Manual Input",
"Manual Mark": "Manual Mark",
"Mark Data": "Mark Data",
"Move Failed": "Move Failed",
"My Dataset": "My Dataset",
"No Folder": "No Folder",
"Other Data": "Other Data",
"Select Dataset": "Select Dataset",
"Select Folder": "Enter folder",
"Upload Time": "Upload Time",

View File

@@ -127,6 +127,8 @@
"Output": "输出",
"Password inconsistency": "两次密码不一致",
"Rename": "重命名",
"Rename Failed": "重命名失败",
"Rename Success": "重命名成功",
"Search": "搜索",
"Status": "状态",
"Update Successful": "更新成功",
@@ -214,10 +216,14 @@
"Filename": "文件名",
"Files": "文件: {{total}}个",
"Folder Name": "输入文件夹名称",
"Insert Data": "插入",
"Manual Data": "手动录入",
"Manual Input": "手动录入",
"Manual Mark": "手动标注",
"Mark Data": "标注数据",
"Move Failed": "移动出现错误~",
"My Dataset": "我的知识库",
"No Folder": "没有子目录了~",
"Other Data": "其他数据",
"Select Dataset": "选择该知识库",
"Select Folder": "进入文件夹",
"Upload Time": "上传时间",

View File

@@ -36,7 +36,8 @@ const ResponseTags = ({ responseData = [] }: { responseData?: ChatHistoryItemRes
quoteList: responseData
.filter((item) => item.moduleType === FlowModuleTypeEnum.chatNode)
.map((item) => item.quoteList)
.flat(),
.flat()
.filter((item) => item) as QuoteItemType[],
historyPreview: chatData?.historyPreview,
runningTime: +responseData.reduce((sum, item) => sum + (item.runningTime || 0), 0).toFixed(2)
};

View File

@@ -63,6 +63,7 @@ import styles from './index.module.scss';
import Script from 'next/script';
import { postQuestionGuide } from '@/api/core/ai/agent/api';
import { splitGuideModule } from './utils';
import { DatasetSpecialIdEnum } from '@fastgpt/core/dataset/constant';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 24);
@@ -511,6 +512,12 @@ const ChatBox = (
// add guide text listener
useEffect(() => {
const windowMessage = ({ data }: MessageEvent<{ type: 'sendPrompt'; text: string }>) => {
if (data?.type === 'sendPrompt' && data?.text) {
handleSubmit((item) => sendPrompt(item, data.text))();
}
};
window.addEventListener('message', windowMessage);
event.on('guideClick', ({ text }: { text: string }) => {
if (!text) return;
handleSubmit((data) => sendPrompt(data, text))();
@@ -518,6 +525,7 @@ const ChatBox = (
return () => {
event.off('guideClick');
window.removeEventListener('message', windowMessage);
};
}, [handleSubmit, sendPrompt]);
@@ -995,7 +1003,8 @@ const ChatBox = (
defaultValues={{
dataId: adminMarkData.dataId,
q: adminMarkData.q,
a: adminMarkData.a
a: adminMarkData.a,
file_id: DatasetSpecialIdEnum.mark
}}
/>
)}

View File

@@ -3,15 +3,6 @@ export enum UserAuthTypeEnum {
findPassword = 'findPassword'
}
export const fileImgs = [
{ suffix: 'pdf', src: '/imgs/files/pdf.svg' },
{ suffix: 'csv', src: '/imgs/files/csv.svg' },
{ suffix: '(doc|docs)', src: '/imgs/files/doc.svg' },
{ suffix: 'txt', src: '/imgs/files/txt.svg' },
{ suffix: 'md', src: '/imgs/files/markdown.svg' },
{ suffix: '.', src: '/imgs/files/file.svg' }
];
export enum TrackEventName {
windowError = 'windowError',
pageError = 'pageError',

View File

@@ -34,4 +34,3 @@ export const KbTypeMap = {
};
export const FolderAvatarSrc = '/imgs/files/folder.svg';
export const OtherFileId = 'other';

View File

@@ -0,0 +1,109 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
import { connectToDatabase } from '@/service/mongo';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { DatasetSpecialIdEnum } from '@fastgpt/core/dataset/constant';
import mongoose, { Types } from 'mongoose';
import { delay } from '@/utils/tools';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
let initFileIds: string[] = [];
try {
const { limit = 100 } = req.body;
await connectToDatabase();
await authUser({ req, authRoot: true });
console.log('add index');
await PgClient.query(
`
ALTER TABLE modeldata
ALTER COLUMN source TYPE VARCHAR(256),
ALTER COLUMN file_id TYPE VARCHAR(256);
CREATE INDEX IF NOT EXISTS modelData_fileId_index ON modeldata (file_id);
`
);
console.log('index success');
console.log('count rows');
// 去重获取 fileId
const { rows } = await PgClient.query(`SELECT DISTINCT file_id
FROM ${PgDatasetTableName} WHERE file_id IS NOT NULL AND file_id != '';
`);
console.log('count rows success', rows.length);
console.log('start filter');
for (let i = 0; i < rows.length; i += limit) {
await init(rows.slice(i, i + limit), initFileIds);
console.log(i);
}
console.log('filter success');
console.log('start update');
for (let i = 0; i < initFileIds.length; i++) {
await PgClient.query(`UPDATE ${PgDatasetTableName}
SET file_id = '${DatasetSpecialIdEnum.manual}'
WHERE file_id = '${initFileIds[i]}'`);
console.log('update: ', initFileIds[i]);
}
const { rows: emptyIds } = await PgClient.query(
`SELECT id FROM ${PgDatasetTableName} WHERE file_id IS NULL OR file_id=''`
);
console.log(emptyIds.length);
await delay(5000);
async function start(start: number) {
for (let i = start; i < emptyIds.length; i += limit) {
await PgClient.query(`UPDATE ${PgDatasetTableName}
SET file_id = '${DatasetSpecialIdEnum.manual}'
WHERE id = '${emptyIds[i].id}'`);
console.log('update: ', i, emptyIds[i].id);
}
}
for (let i = 0; i < limit; i++) {
start(i);
}
// await PgClient.query(
// `UPDATE ${PgDatasetTableName}
// SET file_id = '${DatasetSpecialIdEnum.manual}'
// WHERE file_id IS NULL OR file_id = ''`
// );
console.log('update success');
jsonRes(res, {
data: {
empty: emptyIds.length
}
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}
async function init(rows: any[], initFileIds: string[]) {
const collection = mongoose.connection.db.collection(`dataset.files`);
/* 遍历所有的 fileId去找有没有对应的文件没有的话则改成manual */
const updateResult = await Promise.allSettled(
rows.map(async (item) => {
// 找下是否有对应的文件
const file = await collection.findOne({
_id: new Types.ObjectId(item.file_id)
});
if (file) return '';
// 没有文件的改成manual
initFileIds.push(item.file_id);
return item.file_id;
})
);
// @ts-ignore
console.log(updateResult.filter((item) => item?.value).length);
}

View File

@@ -91,6 +91,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
if (res.closed) {
return stream.destroy();
}
q = q.replace(/"/g, '""');
a = a.replace(/"/g, '""');
source = source?.replace(/"/g, '""');
write(`\n"${q}","${a || ''}","${source || ''}"`);
});
// finish

View File

@@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { OtherFileId } from '@/constants/dataset';
import type { PgDataItemType } from '@/types/core/dataset/data';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -36,15 +35,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
['user_id', userId],
'AND',
['kb_id', kbId],
...(fileId
? fileId === OtherFileId
? ["AND (file_id IS NULL OR file_id = '')"]
: ['AND', ['file_id', fileId]]
: []),
'AND',
['file_id', fileId],
...(searchText
? [
'AND',
`(q LIKE '%${searchText}%' OR a LIKE '%${searchText}%' OR source LIKE '%${searchText}%')`
`(q ILIKE '%${searchText}%' OR a ILIKE '%${searchText}%' OR source ILIKE '%${searchText}%')`
]
: [])
];

View File

@@ -1,3 +1,7 @@
/*
insert one data to dataset (immediately insert)
manual input or mark data
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
@@ -11,7 +15,6 @@ import { DatasetDataItemType } from '@/types/core/dataset/data';
import { countPromptTokens } from '@/utils/common/tiktoken';
export type Props = {
billId?: string;
kbId: string;
data: DatasetDataItemType;
};
@@ -40,7 +43,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
export async function getVectorAndInsertDataset(
props: Props & { userId: string }
): Promise<string> {
const { kbId, data, userId, billId } = props;
const { kbId, data, userId } = props;
if (!kbId || !data?.q) {
return Promise.reject('缺少参数');
}
@@ -61,7 +64,7 @@ export async function getVectorAndInsertDataset(
const { rows: existsRows } = await PgClient.query(`
SELECT COUNT(*) > 0 AS exists
FROM ${PgDatasetTableName}
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND kb_id='${kbId}'
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND file_id='${data.file_id}' AND kb_id='${kbId}'
`);
const exists = existsRows[0]?.exists || false;
@@ -72,8 +75,7 @@ export async function getVectorAndInsertDataset(
const { vectors } = await getVector({
model: kb.vectorModel,
input: [q],
userId,
billId
userId
});
const response = await insertData2Dataset({

View File

@@ -6,7 +6,7 @@ import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { Types } from 'mongoose';
import { OtherFileId } from '@/constants/dataset';
import { isSpecialFileId } from '@fastgpt/core/dataset/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -22,14 +22,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const { userId } = await authUser({ req, authToken: true });
// other data. Delete only vector data
if (fileId === OtherFileId) {
if (isSpecialFileId(fileId)) {
await PgClient.delete(PgDatasetTableName, {
where: [
['user_id', userId],
'AND',
['kb_id', kbId],
"AND (file_id IS NULL OR file_id = '')"
]
where: [['user_id', userId], 'AND', ['kb_id', kbId], 'AND', ['file_id', fileId]]
});
} else {
// auth file

View File

@@ -3,8 +3,12 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { OtherFileId } from '@/constants/dataset';
import { datasetSpecialIdMap } from '@fastgpt/core/dataset/constant';
import { datasetSpecialIds } from '@fastgpt/core/dataset/constant';
import type { GSFileInfoType } from '@/types/common/file';
import { strIsLink } from '@fastgpt/common/tools/str';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -14,12 +18,32 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
if (fileId === OtherFileId) {
// manual, mark
if (datasetSpecialIds.includes(fileId)) {
return jsonRes<GSFileInfoType>(res, {
data: {
id: OtherFileId,
id: fileId,
size: 0,
filename: 'kb.Other Data',
// @ts-ignore
filename: datasetSpecialIdMap[fileId]?.name || fileId,
uploadDate: new Date(),
encoding: '',
contentType: ''
}
});
}
// link file
if (strIsLink(fileId)) {
const { rows } = await PgClient.select(PgDatasetTableName, {
where: [['user_id', userId], 'AND', ['file_id', fileId]],
limit: 1,
fields: ['source']
});
return jsonRes<GSFileInfoType>(res, {
data: {
id: fileId,
size: 0,
filename: rows[0]?.source || fileId,
uploadDate: new Date(),
encoding: '',
contentType: ''

View File

@@ -5,7 +5,14 @@ import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { FileStatusEnum, OtherFileId } from '@/constants/dataset';
import { FileStatusEnum } from '@/constants/dataset';
import { strIsLink } from '@fastgpt/common/tools/str';
import {
DatasetSpecialIdEnum,
datasetSpecialIdMap,
datasetSpecialIds
} from '@fastgpt/core/dataset/constant';
import { Types } from 'mongoose';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -22,57 +29,106 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
// select and count same file_id data, exclude special id
const pgWhere = `user_id = '${userId}' AND kb_id = '${kbId}' ${datasetSpecialIds
.map((item) => `AND file_id!='${item}'`)
.join(' ')}
${searchText ? `AND source ILIKE '%${searchText}%'` : ''}`;
const [{ rows }, { rowCount: total }] = await Promise.all([
PgClient.query(`SELECT file_id, COUNT(*) AS count
FROM ${PgDatasetTableName}
where ${pgWhere}
GROUP BY file_id
ORDER BY file_id DESC
LIMIT ${pageSize} OFFSET ${(pageNum - 1) * pageSize};
`),
PgClient.query(`SELECT DISTINCT file_id
FROM ${PgDatasetTableName}
where ${pgWhere}
`)
]);
// find files
const gridFs = new GridFSStorage('dataset', userId);
const collection = gridFs.Collection();
const mongoWhere = {
['metadata.kbId']: kbId,
['metadata.userId']: userId,
['metadata.datasetUsed']: true,
...(searchText && { filename: { $regex: searchText } })
async function getSpecialData() {
if (pageNum !== 1) return [];
return [
{
id: DatasetSpecialIdEnum.manual,
size: 0,
filename: datasetSpecialIdMap[DatasetSpecialIdEnum.manual].name,
uploadTime: new Date(),
status: FileStatusEnum.ready,
chunkLength: await PgClient.count(PgDatasetTableName, {
fields: ['id'],
where: [
['user_id', userId],
'AND',
['file_id', DatasetSpecialIdEnum.manual],
'AND',
['kb_id', kbId]
]
})
},
{
id: DatasetSpecialIdEnum.mark,
size: 0,
filename: datasetSpecialIdMap[DatasetSpecialIdEnum.mark].name,
uploadTime: new Date(),
status: FileStatusEnum.ready,
chunkLength: await PgClient.count(PgDatasetTableName, {
fields: ['id'],
where: [
['user_id', userId],
'AND',
['file_id', DatasetSpecialIdEnum.mark],
'AND',
['kb_id', kbId]
]
})
}
];
}
const data = await Promise.all([
getSpecialData(),
...rows.map(async (row) => {
// link data
if (strIsLink(row.file_id)) {
const { rows } = await PgClient.select(PgDatasetTableName, {
where: [['user_id', userId], 'AND', ['file_id', row.file_id]],
limit: 1,
fields: ['source']
});
return {
id: row.file_id,
size: 0,
filename: rows[0]?.source || row.file_id,
uploadTime: new Date(),
status: FileStatusEnum.ready,
chunkLength: row.count
};
const [files, total] = await Promise.all([
collection
.find(mongoWhere, {
}
// file data
const file = await collection.findOne(
{
_id: new Types.ObjectId(row.file_id),
['metadata.userId']: userId,
['metadata.kbId']: kbId
},
{
projection: {
_id: 1,
filename: 1,
uploadDate: 1,
length: 1
}
})
.skip((pageNum - 1) * pageSize)
.limit(pageSize)
.sort({ uploadDate: -1 })
.toArray(),
collection.countDocuments(mongoWhere)
]);
async function GetOtherData() {
return {
id: OtherFileId,
size: 0,
filename: 'kb.Other Data',
uploadTime: new Date(),
status: (await TrainingData.findOne({ userId, kbId, file_id: '' }))
? FileStatusEnum.embedding
: FileStatusEnum.ready,
chunkLength: await PgClient.count(PgDatasetTableName, {
fields: ['id'],
where: [
['user_id', userId],
'AND',
['kb_id', kbId],
"AND (file_id IS NULL OR file_id = '')"
]
})
};
}
const data = await Promise.all([
GetOtherData(),
...files.map(async (file) => {
);
if (!file) return null;
return {
id: String(file._id),
size: file.length,
@@ -81,16 +137,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
status: (await TrainingData.findOne({ userId, kbId, file_id: file._id }))
? FileStatusEnum.embedding
: FileStatusEnum.ready,
chunkLength: await PgClient.count(PgDatasetTableName, {
fields: ['id'],
where: [
['user_id', userId],
'AND',
['kb_id', kbId],
'AND',
['file_id', String(file._id)]
]
})
chunkLength: row.count
};
})
]);
@@ -99,7 +146,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
data: {
pageNum,
pageSize,
data: data.flat(),
data: data.flat().filter((item) => item),
total
}
});

View File

@@ -8,6 +8,7 @@ import { Types } from 'mongoose';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { addLog } from '@/service/utils/tools';
import { strIsLink } from '@fastgpt/common/tools/str';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -19,6 +20,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const gridFs = new GridFSStorage('dataset', userId);
const collection = gridFs.Collection();
if (id.length === 24 && !strIsLink(id)) {
await collection.findOneAndUpdate(
{
_id: new Types.ObjectId(id)
@@ -30,9 +32,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
}
}
);
}
// data source
updateDatasetSource({
await updateDatasetSource({
fileId: id,
userId,
name

View File

@@ -1,31 +1,28 @@
import React, { useCallback, useState, useRef, useMemo } from 'react';
import { Box, Card, IconButton, Flex, Grid, Image } from '@chakra-ui/react';
import { Box, Card, IconButton, Flex, Grid, Image, Button } from '@chakra-ui/react';
import type { PgDataItemType } from '@/types/core/dataset/data';
import { usePagination } from '@/hooks/usePagination';
import {
getDatasetDataList,
delOneDatasetDataById,
getTrainingData
} from '@/api/core/dataset/data';
import { getDatasetDataList, delOneDatasetDataById } from '@/api/core/dataset/data';
import { getFileInfoById } from '@/api/core/dataset/file';
import { DeleteIcon, RepeatIcon } from '@chakra-ui/icons';
import { useQuery } from '@tanstack/react-query';
import { useToast } from '@/hooks/useToast';
import InputModal, { FormData as InputDataType } from './InputDataModal';
import InputModal, { FormData as InputDataType, RawFileText } from './InputDataModal';
import { debounce } from 'lodash';
import { getErrText } from '@/utils/tools';
import { useConfirm } from '@/hooks/useConfirm';
import { useTranslation } from 'react-i18next';
import { useRouter } from 'next/router';
import MyIcon from '@/components/Icon';
import MyTooltip from '@/components/MyTooltip';
import MyInput from '@/components/MyInput';
import { fileImgs } from '@/constants/common';
import { useLoading } from '@/hooks/useLoading';
import { getFileIcon, getSpecialFileIcon } from '@fastgpt/common/tools/file';
const DataCard = ({ kbId }: { kbId: string }) => {
const BoxRef = useRef<HTMLDivElement>(null);
const lastSearch = useRef('');
const router = useRouter();
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
const { fileId = '' } = router.query as { fileId: string };
const { t } = useTranslation();
const [searchText, setSearchText] = useState('');
@@ -37,7 +34,6 @@ const DataCard = ({ kbId }: { kbId: string }) => {
const {
data: kbDataList,
isLoading,
Pagination,
total,
getData,
@@ -52,6 +48,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
fileId
},
onChange() {
setIsLoading(false);
if (BoxRef.current) {
BoxRef.current.scrollTop = 0;
}
@@ -72,9 +69,8 @@ const DataCard = ({ kbId }: { kbId: string }) => {
// get file info
const { data: fileInfo } = useQuery(['getFileInfo', fileId], () => getFileInfoById(fileId));
const fileIcon = useMemo(
() =>
fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(fileInfo?.filename || ''))?.src,
[fileInfo?.filename]
() => getSpecialFileIcon(fileInfo?.id) || getFileIcon(fileInfo?.filename),
[fileInfo?.filename, fileInfo?.id]
);
return (
@@ -82,10 +78,9 @@ const DataCard = ({ kbId }: { kbId: string }) => {
<Flex alignItems={'center'}>
<IconButton
mr={3}
icon={<MyIcon name={'backFill'} w={'18px'} color={'myBlue.600'} />}
icon={<MyIcon name={'backFill'} w={['14px', '18px']} color={'myBlue.600'} />}
bg={'white'}
boxShadow={'1px 1px 9px rgba(0,0,0,0.15)'}
h={'28px'}
size={'sm'}
borderRadius={'50%'}
aria-label={''}
@@ -98,30 +93,34 @@ const DataCard = ({ kbId }: { kbId: string }) => {
})
}
/>
<Flex
className="textEllipsis"
flex={'1 0 0'}
mr={[3, 5]}
fontSize={['sm', 'md']}
alignItems={'center'}
>
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
<Image src={fileIcon || '/imgs/files/file.svg'} w={'16px'} mr={2} alt={''} />
{t(fileInfo?.filename || 'Filename')}
<RawFileText
filename={fileInfo?.filename}
fileId={fileInfo?.id}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
/>
</Flex>
<Box>
<MyTooltip label={'刷新'}>
<IconButton
icon={<RepeatIcon />}
size={['sm', 'md']}
aria-label={'refresh'}
<Button
ml={2}
variant={'base'}
isLoading={isLoading}
size={['sm', 'md']}
onClick={() => {
getData(pageNum);
getTrainingData({ kbId, init: true });
if (!fileInfo) return;
setEditInputData({
dataId: '',
q: '',
a: '',
source: fileInfo.filename,
file_id: fileInfo.id
});
}}
/>
</MyTooltip>
>
{t('kb.Insert Data')}
</Button>
</Box>
</Flex>
<Flex my={3} alignItems={'center'}>
@@ -249,6 +248,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
/>
)}
<ConfirmModal />
<Loading fixed={false} />
</Box>
);
};

View File

@@ -22,16 +22,17 @@ import { useTranslation } from 'react-i18next';
import MyIcon from '@/components/Icon';
import MyInput from '@/components/MyInput';
import dayjs from 'dayjs';
import { fileImgs } from '@/constants/common';
import { useRequest } from '@/hooks/useRequest';
import { useLoading } from '@/hooks/useLoading';
import { FileStatusEnum, OtherFileId } from '@/constants/dataset';
import { FileStatusEnum } from '@/constants/dataset';
import { useRouter } from 'next/router';
import { usePagination } from '@/hooks/usePagination';
import type { DatasetFileItemType } from '@/types/core/dataset/file';
import { useGlobalStore } from '@/store/global';
import MyMenu from '@/components/MyMenu';
import { useEditTitle } from '@/hooks/useEditTitle';
import { datasetSpecialIds } from '@fastgpt/core/dataset/constant';
import { getFileIcon, getSpecialFileIcon } from '@fastgpt/common/tools/file';
const FileCard = ({ kbId }: { kbId: string }) => {
const BoxRef = useRef<HTMLDivElement>(null);
@@ -79,10 +80,14 @@ const FileCard = ({ kbId }: { kbId: string }) => {
// add file icon
const formatFiles = useMemo(
() =>
files.map((file) => ({
files.map((file) => {
const icon = getSpecialFileIcon(file.id) || getFileIcon(file.filename);
return {
...file,
icon: fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(file.filename))?.src
})),
icon
};
}),
[files]
);
@@ -114,8 +119,8 @@ const FileCard = ({ kbId }: { kbId: string }) => {
onSettled() {
setLoading(false);
},
successToast: t('common.Delete Success'),
errorToast: t('common.Delete Failed')
successToast: t('common.Rename Success'),
errorToast: t('common.Rename Failed')
});
const { onOpenModal, EditModal: EditTitleModal } = useEditTitle({
@@ -135,11 +140,15 @@ const FileCard = ({ kbId }: { kbId: string }) => {
// training data
const { data: { qaListLen = 0, vectorListLen = 0 } = {}, refetch: refetchTrainingData } =
useQuery(['getModelSplitDataList', kbId], () => getTrainingData({ kbId, init: false }), {
useQuery(
['getModelSplitDataList', kbId],
() => getTrainingData({ kbId, init: Math.random() > 0.7 }),
{
onError(err) {
console.log(err);
}
});
}
);
useQuery(
['refetchTrainingData', kbId],
@@ -279,7 +288,7 @@ const FileCard = ({ kbId }: { kbId: string }) => {
</MenuButton>
}
menuList={[
...(file.id !== OtherFileId
...(!datasetSpecialIds.includes(file.id)
? [
{
child: (

View File

@@ -3,13 +3,14 @@ import { useTranslation } from 'next-i18next';
import MyModal from '@/components/MyModal';
import { Box, Input, Textarea, ModalBody, ModalFooter, Button } from '@chakra-ui/react';
import { useForm } from 'react-hook-form';
import { useRequest } from '@/hooks/useRequest';
const CreateFileModal = ({
onClose,
onSuccess
}: {
onClose: () => void;
onSuccess: (e: { filename: string; content: string }) => void;
onSuccess: (e: { filename: string; content: string }) => Promise<void>;
}) => {
const { t } = useTranslation();
const { register, handleSubmit } = useForm({
@@ -19,6 +20,13 @@ const CreateFileModal = ({
}
});
const { mutate, isLoading } = useRequest({
mutationFn: () => handleSubmit(onSuccess)(),
onSuccess: () => {
onClose();
}
});
return (
<MyModal title={t('file.Create File')} isOpen w={'600px'} top={'15vh'}>
<ModalBody>
@@ -47,12 +55,7 @@ const CreateFileModal = ({
<Button variant={'base'} mr={4} onClick={onClose}>
</Button>
<Button
onClick={() => {
handleSubmit(onSuccess)();
onClose();
}}
>
<Button isLoading={isLoading} onClick={mutate}>
</Button>
</ModalFooter>

View File

@@ -12,7 +12,6 @@ import {
readDocContent
} from '@/utils/web/file';
import { Box, Flex, useDisclosure, type BoxProps } from '@chakra-ui/react';
import { fileImgs } from '@/constants/common';
import { DragEvent, useCallback, useState } from 'react';
import { useTranslation } from 'next-i18next';
import { customAlphabet } from 'nanoid';
@@ -22,12 +21,13 @@ import { FetchResultItem } from '@/types/plugin';
import type { DatasetDataItemType } from '@/types/core/dataset/data';
import { getErrText } from '@/utils/tools';
import { useDatasetStore } from '@/store/dataset';
import { getFileIcon } from '@fastgpt/common/tools/file';
const UrlFetchModal = dynamic(() => import('./UrlFetchModal'));
const CreateFileModal = dynamic(() => import('./CreateFileModal'));
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const csvTemplate = `index,content,source\n"被索引的内容","对应的答案。CSV 中请注意内容不能包含双引号,双引号是列分割符号","来源,可选。"\n"什么是 laf","laf 是一个云函数开发平台……",""\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……",""`;
const csvTemplate = `index,content\n"被索引的内容","对应的答案。CSV 中请注意内容不能包含双引号,双引号是列分割符号"\n"什么是 laf","laf 是一个云函数开发平台……",""\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……"`;
export type FileItemType = {
id: string;
@@ -63,7 +63,7 @@ const FileSelect = ({
const { toast } = useToast();
const { File, onOpen } = useSelectFile({
const { File: FileSelector, onOpen } = useSelectFile({
fileType: fileExtension,
multiple: true
});
@@ -92,11 +92,9 @@ const FileSelect = ({
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
/* text file */
const icon = fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(file.name))?.src;
const icon = getFileIcon(file?.name);
if (!icon) {
continue;
}
if (!icon) continue;
// parse and upload files
let [text, filesId] = await Promise.all([
@@ -165,7 +163,7 @@ const FileSelect = ({
.map((item) => ({
q: item[0] || '',
a: item[1] || '',
source: item[2] || file.name || '',
source: file.name || '',
file_id: filesId[0]
}))
};
@@ -201,7 +199,8 @@ const FileSelect = ({
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: '',
source: url
source: url,
file_id: url
}))
};
});
@@ -210,15 +209,25 @@ const FileSelect = ({
[chunkLen, onPushFiles]
);
const onCreateFile = useCallback(
({ filename, content }: { filename: string; content: string }) => {
async ({ filename, content }: { filename: string; content: string }) => {
content = simpleText(content);
// create virtual txt file
const txtBlob = new Blob([content], { type: 'text/plain' });
const txtFile = new File([txtBlob], `${filename}.txt`, {
type: txtBlob.type,
lastModified: new Date().getTime()
});
const fileIds = await uploadFiles([txtFile], { kbId: kbDetail._id });
const splitRes = splitText2Chunks({
text: content,
maxLen: chunkLen
});
onPushFiles([
{
id: nanoid(),
id: fileIds[0],
filename,
icon: '/imgs/files/txt.svg',
text: content,
@@ -226,12 +235,13 @@ const FileSelect = ({
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: '',
source: filename
source: filename,
file_id: fileIds[0]
}))
}
]);
},
[chunkLen, onPushFiles]
[chunkLen, kbDetail._id, onPushFiles]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
@@ -383,7 +393,7 @@ const FileSelect = ({
{selectingText !== undefined && (
<FileSelectLoading loading text={selectingText} fixed={false} />
)}
<File onSelect={onSelectFile} />
<FileSelector onSelect={onSelectFile} />
{isOpenUrlFetch && <UrlFetchModal onClose={onCloseUrlFetch} onSuccess={onUrlFetch} />}
{isOpenCreateFile && <CreateFileModal onClose={onCloseCreateFile} onSuccess={onCreateFile} />}
</Box>

View File

@@ -4,11 +4,11 @@ import { useForm } from 'react-hook-form';
import { useToast } from '@/hooks/useToast';
import { useRequest } from '@/hooks/useRequest';
import { getErrText } from '@/utils/tools';
import { postChunks2Dataset } from '@/api/core/dataset/data';
import { TrainingModeEnum } from '@/constants/plugin';
import { postData2Dataset } from '@/api/core/dataset/data';
import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { useDatasetStore } from '@/store/dataset';
import { DatasetSpecialIdEnum, datasetSpecialIdMap } from '@fastgpt/core/dataset/constant';
type ManualFormType = { q: string; a: string };
@@ -33,23 +33,16 @@ const ManualImport = ({ kbId }: { kbId: string }) => {
}
try {
const data = {
await postData2Dataset({
kbId,
data: {
a: e.a,
q: e.q,
source: '手动录入'
};
const { insertLen } = await postChunks2Dataset({
kbId,
mode: TrainingModeEnum.index,
data: [data]
source: datasetSpecialIdMap[DatasetSpecialIdEnum.manual]?.sourceName,
file_id: DatasetSpecialIdEnum.manual
}
});
if (insertLen === 0) {
toast({
title: '已存在完全一致的数据',
status: 'warning'
});
} else {
toast({
title: '导入数据成功,需要一段时间训练',
status: 'success'
@@ -58,7 +51,6 @@ const ManualImport = ({ kbId }: { kbId: string }) => {
a: '',
q: ''
});
}
} catch (err: any) {
toast({
title: getErrText(err, '出现了点意外~'),

View File

@@ -1,4 +1,4 @@
import React, { useState, useCallback } from 'react';
import React, { useState, useCallback, useMemo } from 'react';
import { Box, Flex, Button, Textarea, IconButton, BoxProps } from '@chakra-ui/react';
import { useForm } from 'react-hook-form';
import {
@@ -17,6 +17,9 @@ import { DatasetDataItemType } from '@/types/core/dataset/data';
import { useTranslation } from 'react-i18next';
import { useDatasetStore } from '@/store/dataset';
import { getFileAndOpen } from '@/utils/web/file';
import { datasetSpecialIdMap, datasetSpecialIds } from '@fastgpt/core/dataset/constant';
import { strIsLink } from '@fastgpt/common/tools/str';
import { useGlobalStore } from '@/store/global';
export type FormData = { dataId?: string } & DatasetDataItemType;
@@ -25,16 +28,13 @@ const InputDataModal = ({
onSuccess,
onDelete,
kbId,
defaultValues = {
a: '',
q: ''
}
defaultValues
}: {
onClose: () => void;
onSuccess: (data: FormData) => void;
onDelete?: () => void;
kbId: string;
defaultValues?: FormData;
defaultValues: FormData;
}) => {
const { t } = useTranslation();
const [loading, setLoading] = useState(false);
@@ -64,10 +64,10 @@ const InputDataModal = ({
try {
const data = {
...e,
dataId: '',
a: e.a,
q: e.q,
source: '手动录入'
// @ts-ignore
source: e.source || datasetSpecialIdMap[e.file_id]?.sourceName
};
data.dataId = await postData2Dataset({
kbId,
@@ -79,6 +79,7 @@ const InputDataModal = ({
status: 'success'
});
reset({
...e,
a: '',
q: ''
});
@@ -103,9 +104,9 @@ const InputDataModal = ({
setLoading(true);
try {
const data = {
...e,
dataId: e.dataId,
kbId,
a: e.a,
q: e.q === defaultValues.q ? '' : e.q
};
await putDatasetDataById(data);
@@ -259,31 +260,40 @@ interface RawFileTextProps extends BoxProps {
export function RawFileText({ fileId, filename = '', ...props }: RawFileTextProps) {
const { t } = useTranslation();
const { toast } = useToast();
const { setLoading } = useGlobalStore();
const hasFile = useMemo(() => fileId && !datasetSpecialIds.includes(fileId), [fileId]);
return (
<MyTooltip label={fileId ? t('file.Click to view file') || '' : ''} shouldWrapChildren={false}>
<MyTooltip label={hasFile ? t('file.Click to view file') || '' : ''} shouldWrapChildren={false}>
<Box
color={'myGray.600'}
display={'inline-block'}
whiteSpace={'nowrap'}
{...(!!fileId
{...(hasFile
? {
cursor: 'pointer',
textDecoration: 'underline',
onClick: async () => {
if (strIsLink(fileId)) {
return window.open(fileId, '_blank');
}
setLoading(true);
try {
await getFileAndOpen(fileId);
await getFileAndOpen(fileId as string);
} catch (error) {
toast({
title: getErrText(error, '获取文件地址失败'),
status: 'error'
});
}
setLoading(false);
}
}
: {})}
{...props}
>
{filename}
{t(filename)}
</Box>
</MyTooltip>
);

View File

@@ -179,8 +179,8 @@ export const insertData2Dataset = ({
values: data.map((item) => [
{ key: 'user_id', value: userId },
{ key: 'kb_id', value: kbId },
{ key: 'source', value: item.source?.slice(0, 60)?.trim() || '' },
{ key: 'file_id', value: item.file_id || '' },
{ key: 'source', value: item.source?.slice(0, 200)?.trim() || '' },
{ key: 'file_id', value: item.file_id?.slice(0, 200)?.trim() || '' },
{ key: 'q', value: item.q.replace(/'/g, '"') },
{ key: 'a', value: item.a.replace(/'/g, '"') },
{ key: 'vector', value: `[${item.vector}]` }
@@ -198,13 +198,14 @@ export async function initPg() {
vector VECTOR(1536) NOT NULL,
user_id VARCHAR(50) NOT NULL,
kb_id VARCHAR(50),
source VARCHAR(100),
file_id VARCHAR(100),
source VARCHAR(256),
file_id VARCHAR(256),
q TEXT NOT NULL,
a TEXT
);
CREATE INDEX IF NOT EXISTS modelData_userId_index ON ${PgDatasetTableName} USING HASH (user_id);
CREATE INDEX IF NOT EXISTS modelData_kbId_index ON ${PgDatasetTableName} USING HASH (kb_id);
CREATE INDEX IF NOT EXISTS modelData_kb_id_index ON ${PgDatasetTableName} (kb_id);
CREATE INDEX IF NOT EXISTS modelData_fileId_index ON ${PgDatasetTableName} (file_id);
CREATE INDEX IF NOT EXISTS idx_model_data_md5_q_a_user_id_kb_id ON ${PgDatasetTableName} (md5(q), md5(a), user_id, kb_id);
`);
console.log('init pg successful');

View File

@@ -2,9 +2,9 @@ export type DatasetDataItemType = {
q: string; // 提问词
a: string; // 原文
source?: string;
file_id?: string;
file_id: string;
};
export type PgDataItemType = DatasetItemType & {
export type PgDataItemType = DatasetDataItemType & {
id: string;
};

4
shdemo.sh Normal file
View File

@@ -0,0 +1,4 @@
# Build image
docker build -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.4.7 --build-arg name=app .