mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-21 11:43:56 +00:00
Optimize the file storage structure of the knowledge base (#386)
This commit is contained in:
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -2,7 +2,7 @@
|
||||
"editor.formatOnSave": true,
|
||||
"editor.mouseWheelZoom": true,
|
||||
"typescript.tsdk": "node_modules/typescript/lib",
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode",
|
||||
"prettier.prettierPath": "./node_modules/prettier",
|
||||
"i18n-ally.localesPaths": [
|
||||
"projects/app/public/locales"
|
||||
],
|
||||
|
@@ -3,7 +3,7 @@
|
||||
## 本地运行
|
||||
|
||||
1. 安装 go 语言环境。
|
||||
2. 安装 hugo。 [二进制下载](https://github.com/gohugoio/hugo/releases/tag/v0.117.0)
|
||||
2. 安装 hugo。 [二进制下载](https://github.com/gohugoio/hugo/releases/tag/v0.117.0),注意需要安装 extended 版本。
|
||||
3. cd docSite
|
||||
4. hugo serve
|
||||
5. 访问 http://localhost:1313
|
||||
|
@@ -84,6 +84,14 @@ weight: 520
|
||||
"maxToken": 16000,
|
||||
"price": 0,
|
||||
"prompt": ""
|
||||
},
|
||||
"QGModel": { // 生成下一步指引模型
|
||||
"model": "gpt-3.5-turbo",
|
||||
"name": "GPT35-4k",
|
||||
"maxToken": 4000,
|
||||
"price": 0,
|
||||
"prompt": "",
|
||||
"functionCall": false
|
||||
}
|
||||
}
|
||||
```
|
||||
|
8
docSite/content/docs/development/design/_index.md
Normal file
8
docSite/content/docs/development/design/_index.md
Normal file
@@ -0,0 +1,8 @@
|
||||
---
|
||||
weight: 540
|
||||
title: "设计方案"
|
||||
description: "FastGPT 部分设计方案"
|
||||
icon: public
|
||||
draft: false
|
||||
images: []
|
||||
---
|
25
docSite/content/docs/development/design/dataset.md
Normal file
25
docSite/content/docs/development/design/dataset.md
Normal file
@@ -0,0 +1,25 @@
|
||||
---
|
||||
weight: 541
|
||||
title: "数据集"
|
||||
description: "FastGPT 数据集中文件与数据的设计方案"
|
||||
icon: dataset
|
||||
draft: false
|
||||
images: []
|
||||
---
|
||||
|
||||
## 文件与数据的关系
|
||||
|
||||
在 FastGPT 中,文件会通过 MongoDB 的 FS 存储,而具体的数据会通过 PostgreSQL 存储,PG 中的数据会有一列 file_id,关联对应的文件。考虑到旧版本的兼容,以及手动输入、标注数据等,我们给 file_id 增加了一些特殊的值,如下:
|
||||
|
||||
- manual: 手动输入
|
||||
- mark: 手动标注的数据
|
||||
|
||||
注意,file_id 仅在插入数据时会写入,变更时无法修改。
|
||||
|
||||
## 文件导入流程
|
||||
|
||||
1. 上传文件到 MongoDB 的 FS 中,获取 file_id,此时文件标记为 `unused` 状态
|
||||
2. 浏览器解析文件,获取对应的文本和 chunk
|
||||
3. 给每个 chunk 打上 file_id
|
||||
4. 点击上传数据:将文件的状态改为 `used`,并将数据推送到 mongo `training` 表中等待训练
|
||||
5. 由训练线程从 mongo 中取数据,并在获取向量后插入到 pg。
|
29
docSite/content/docs/installation/upgrading/447.md
Normal file
29
docSite/content/docs/installation/upgrading/447.md
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
title: 'V4.4.7'
|
||||
description: 'FastGPT V4.4.7 更新(需执行升级脚本)'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 840
|
||||
---
|
||||
|
||||
## 执行初始化 API
|
||||
|
||||
发起 1 个 HTTP 请求({{rootkey}} 替换成环境变量里的`rootkey`,{{host}}替换成自己域名)
|
||||
|
||||
1. https://xxxxx/api/admin/initv445
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'https://{{host}}/api/admin/initv447' \
|
||||
--header 'rootkey: {{rootkey}}' \
|
||||
--header 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
初始化 pg 索引以及将 file_id 中空对象转成 manual 对象。如果数据多,可能需要较长时间,可以通过日志查看进度。
|
||||
|
||||
## 功能介绍
|
||||
|
||||
### Fast GPT V4.4.7
|
||||
|
||||
1. 优化了数据库文件 crud。
|
||||
2. 兼容链接读取,作为 source。
|
23
packages/common/tools/file.ts
Normal file
23
packages/common/tools/file.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import { strIsLink } from './str';
|
||||
|
||||
export const fileImgs = [
|
||||
{ suffix: 'pdf', src: '/imgs/files/pdf.svg' },
|
||||
{ suffix: 'csv', src: '/imgs/files/csv.svg' },
|
||||
{ suffix: '(doc|docs)', src: '/imgs/files/doc.svg' },
|
||||
{ suffix: 'txt', src: '/imgs/files/txt.svg' },
|
||||
{ suffix: 'md', src: '/imgs/files/markdown.svg' },
|
||||
{ suffix: '.', src: '/imgs/files/file.svg' }
|
||||
];
|
||||
|
||||
export function getFileIcon(name = '') {
|
||||
return fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(name))?.src;
|
||||
}
|
||||
export function getSpecialFileIcon(name = '') {
|
||||
if (name === 'manual') {
|
||||
return '/imgs/files/manual.svg';
|
||||
} else if (name === 'mark') {
|
||||
return '/imgs/files/mark.svg';
|
||||
} else if (strIsLink(name)) {
|
||||
return '/imgs/files/link.svg';
|
||||
}
|
||||
}
|
5
packages/common/tools/str.ts
Normal file
5
packages/common/tools/str.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
export function strIsLink(str?: string) {
|
||||
if (!str) return false;
|
||||
if (/^((http|https)?:\/\/|www\.|\/)[^\s/$.?#].[^\s]*$/i.test(str)) return true;
|
||||
return false;
|
||||
}
|
15
packages/core/dataset/constant.ts
Normal file
15
packages/core/dataset/constant.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
export enum DatasetSpecialIdEnum {
|
||||
manual = 'manual',
|
||||
mark = 'mark'
|
||||
}
|
||||
export const datasetSpecialIdMap = {
|
||||
[DatasetSpecialIdEnum.manual]: {
|
||||
name: 'kb.Manual Data',
|
||||
sourceName: 'kb.Manual Input'
|
||||
},
|
||||
[DatasetSpecialIdEnum.mark]: {
|
||||
name: 'kb.Mark Data',
|
||||
sourceName: 'kb.Manual Mark'
|
||||
}
|
||||
};
|
||||
export const datasetSpecialIds: string[] = [DatasetSpecialIdEnum.manual, DatasetSpecialIdEnum.mark];
|
8
packages/core/dataset/utils.ts
Normal file
8
packages/core/dataset/utils.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { datasetSpecialIds } from './constant';
|
||||
import { strIsLink } from '@fastgpt/common/tools/str';
|
||||
|
||||
export function isSpecialFileId(id: string) {
|
||||
if (datasetSpecialIds.includes(id)) return true;
|
||||
if (strIsLink(id)) return true;
|
||||
return false;
|
||||
}
|
@@ -3,7 +3,9 @@
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"openai": "^3.3.0",
|
||||
"tunnel": "^0.0.6"
|
||||
"tunnel": "^0.0.6",
|
||||
"@fastgpt/common": "workspace:*",
|
||||
"@fastgpt/support": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/tunnel": "^0.0.4"
|
||||
|
@@ -1,4 +1,7 @@
|
||||
{
|
||||
"name": "@fastgpt/support",
|
||||
"version": "1.0.0"
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@fastgpt/common": "workspace:*"
|
||||
}
|
||||
}
|
||||
|
12
pnpm-lock.yaml
generated
12
pnpm-lock.yaml
generated
@@ -31,6 +31,12 @@ importers:
|
||||
|
||||
packages/core:
|
||||
dependencies:
|
||||
'@fastgpt/common':
|
||||
specifier: workspace:*
|
||||
version: link:../common
|
||||
'@fastgpt/support':
|
||||
specifier: workspace:*
|
||||
version: link:../support
|
||||
openai:
|
||||
specifier: ^3.3.0
|
||||
version: registry.npmmirror.com/openai@3.3.0
|
||||
@@ -42,7 +48,11 @@ importers:
|
||||
specifier: ^0.0.4
|
||||
version: registry.npmmirror.com/@types/tunnel@0.0.4
|
||||
|
||||
packages/support: {}
|
||||
packages/support:
|
||||
dependencies:
|
||||
'@fastgpt/common':
|
||||
specifier: workspace:*
|
||||
version: link:../common
|
||||
|
||||
projects/app:
|
||||
dependencies:
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "app",
|
||||
"version": "4.4.6",
|
||||
"version": "4.4.7",
|
||||
"private": false,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
@@ -9,6 +9,6 @@
|
||||
- [计费规则](https://doc.fastgpt.run/docs/pricing/)
|
||||
|
||||
**其他问题**
|
||||
| 交流群 | 小助手 |
|
||||
| ----------------------- | -------------------- |
|
||||
|  |  |
|
||||
| 添加小助手进入交流群 |
|
||||
| ----------------------- |
|
||||
|  |
|
||||
|
1
projects/app/public/imgs/files/link.svg
Normal file
1
projects/app/public/imgs/files/link.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1696841646763" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="2367" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M185.78023917 244.70891583a668.28725918 668.28725918 0 0 0 121.17266584 53.53692476 665.15318203 665.15318203 0 0 0-34.64702231 183.62637022H91.72167743a419.67636227 419.67636227 0 0 1 94.05856174-237.16329498z m227.58241722-142.44192964a672.20323745 672.20323745 0 0 0-84.326223 139.79071526 609.56829772 609.56829772 0 0 1-99.6923923-42.47897884 420.36764571 420.36764571 0 0 1 184.0186153-97.31173642z m381.2622336 97.31044188a607.36887719 607.36887719 0 0 1-99.69109776 42.48027338 672.20323745 672.20323745 0 0 0-84.326223-139.79071526c69.74454391 16.75132524 132.56071902 50.73424941 184.0186153 97.31044188z m43.56509646 45.13148776a419.67636227 419.67636227 0 0 1 94.08704158 237.16329498h-180.52336198a664.06835895 664.06835895 0 0 0-34.70657106-183.62637022 668.28725918 668.28725918 0 0 0 121.17266584-53.53692476z m0 534.58216834a668.28725918 668.28725918 0 0 0-121.17266583-53.53692476 663.64763402 663.64763402 0 0 0 34.70657106-183.62637022h180.52336197a419.58574459 419.58574459 0 0 1-94.0572672 237.16329498zM694.93379223 781.91122963c34.70657106 11.23788674 68.02798617 25.5179384 99.72216668 42.48027339a420.21747927 420.21747927 0 0 1-184.04838968 97.31044187A671.42004179 671.42004179 0 0 0 694.96356661 781.91122963z m-213.06158142-28.07853511v155.75884484a607.60966131 607.60966131 0 0 1-93.63524773-143.31574298 605.26136889 605.26136889 0 0 1 93.63524773-12.44310186z m0-211.70490533v151.54123916a662.9537615 662.9537615 0 0 0-116.59258944 15.96683503 600.07803765 600.07803765 0 0 1-32.74922919-167.50807419h149.34181863z m-93.63524773-284.43335995a608.06145517 608.06145517 0 0 1 93.63524773-143.31574298v155.75884484a594.47398147 594.47398147 0 0 1-93.63524773-12.44310186z m93.63524773 72.60676804v151.57101353h-149.31204425c3.0136851-58.44710843 14.19072853-114.63395303 32.68968043-167.50807419 37.748736 8.6759955 76.70398167 14.10011085 116.62236382 15.96683503z m60.25557838-60.16366618v-155.75884484a607.00770101 607.00770101 0 0 1 93.60547335 143.31574298c-30.42812208 6.50764389-61.67180325 10.78609289-93.60547335 12.44310186z m0 211.73467971V330.33097165a660.18215506 660.18215506 0 0 0 116.59258944-15.99660942 601.1926351 601.1926351 0 0 1 32.71816027 167.5080742h-149.31204423z m93.60547335 284.40358557a606.55590717 606.55590717 0 0 1-93.60547335 143.34551736v-155.75884484c31.93496462 1.6880779 63.17735127 5.93545797 93.60547335 12.41332748z m-93.60547335-72.60676803V542.12778919h149.31204425a601.1926351 601.1926351 0 0 1-32.71945481 167.50807419 662.98483042 662.98483042 0 0 0-116.59258944-15.96683503zM229.34404109 824.42127739a606.97792663 606.97792663 0 0 1 99.72216668-42.51004776 670.24460105 670.24460105 0 0 0 84.35729193 139.7920098 420.30809695 420.30809695 0 0 1-184.07945861-97.31173641z m-43.56380192-45.13019322a419.58574459 419.58574459 0 0 1-94.05856174-237.16329498H272.27740286c3.01239056 63.9903213 14.88330651 125.60128126 34.67679669 183.62637022a668.28725918 668.28725918 0 0 0-121.17266584 53.53692476z m703.53624178 32.17574874A479.71963638 479.71963638 0 0 0 994.03944897 512c0-135.30255107-55.91758064-257.37880462-145.66662447-344.95949685A480.23097901 480.23097901 0 0 0 512 29.96184557a480.23097901 480.23097901 0 0 0-336.3728245 137.07865758c-14.61145347 14.28134622-28.22999609 29.49475998-40.9721363 45.49266394A479.6290187 479.6290187 0 0 0 29.9592565 512c0 266.2062611 215.80341349 482.03944897 482.03944896 482.03944897a480.23097901 480.23097901 0 0 0 336.34305011-137.11102104h0.02977438a484.14825182 484.14825182 0 0 0 40.9721363-45.4913694z" fill="#21A3DD" p-id="2368"></path></svg>
|
After Width: | Height: | Size: 3.8 KiB |
1
projects/app/public/imgs/files/manual.svg
Normal file
1
projects/app/public/imgs/files/manual.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1696841469699" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="11288" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M254.976 100.8h514.048a154.176 154.176 0 0 1 154.176 154.176v514.048a154.176 154.176 0 0 1-154.176 154.176h-514.048a154.176 154.176 0 0 1-154.176-154.176v-514.048a154.176 154.176 0 0 1 154.176-154.176z" fill="#1693F6" opacity=".3" p-id="11289"></path><path d="M192 128a64 64 0 0 0-64 64v640a64 64 0 0 0 64 64h640a64 64 0 0 0 64-64v-640a64 64 0 0 0-64-64h-640m0-128h640a192 192 0 0 1 192 192v640a192 192 0 0 1-192 192h-640a192 192 0 0 1-192-192v-640a192 192 0 0 1 192-192z" fill="#1693F6" p-id="11290"></path><path d="M320 832m-64 0l0 0q-64 0-64-64l0-512q0-64 64-64l0 0q64 0 64 64l0 512q0 64-64 64Z" fill="#1693F6" p-id="11291"></path></svg>
|
After Width: | Height: | Size: 974 B |
1
projects/app/public/imgs/files/mark.svg
Normal file
1
projects/app/public/imgs/files/mark.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1696841493938" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="16677" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M102.4 332.8L170.666667 264.533333l42.666666 42.666667-68.266666 68.266667 42.666666 42.666666L256 345.6l42.666667 42.666667-68.266667 68.266666 12.8 34.133334L469.333333 268.8 315.733333 102.4c-25.6-21.333333-72.533333-17.066667-93.866666 8.533333L76.8 247.466667c-21.333333 21.333333-12.8 51.2 8.533333 72.533333l17.066667 12.8zM85.333333 896c-4.266667 17.066667 8.533333 29.866667 25.6 25.6l260.266667-81.066667L170.666667 644.266667 85.333333 896zM934.4 302.933333c21.333333-21.333333 21.333333-51.2 0-72.533333L810.666667 110.933333c-21.333333-21.333333-55.466667-21.333333-76.8 0l-89.6 85.333334 200.533333 192 89.6-85.333334zM614.4 226.133333l-413.866667 392.533334L401.066667 810.666667l413.866666-396.8zM947.2 699.733333l-187.733333-174.933333-230.4 217.6 42.666666 42.666667 72.533334-68.266667 42.666666 42.666667-72.533333 68.266666 42.666667 42.666667 72.533333-68.266667 42.666667 42.666667-72.533334 68.266667 12.8 12.8c21.333333 21.333333 55.466667 29.866667 81.066667 8.533333l145.066667-136.533333c21.333333-29.866667 29.866667-76.8 8.533333-98.133334z" p-id="16678" fill="#d4237a"></path></svg>
|
After Width: | Height: | Size: 1.4 KiB |
@@ -127,6 +127,8 @@
|
||||
"Output": "Output",
|
||||
"Password inconsistency": "Password inconsistency",
|
||||
"Rename": "Rename",
|
||||
"Rename Failed": "Rename Failed",
|
||||
"Rename Success": "Rename Success",
|
||||
"Search": "Search",
|
||||
"Status": "Status",
|
||||
"Update Successful": "Update Successful",
|
||||
@@ -214,10 +216,14 @@
|
||||
"Filename": "Filename",
|
||||
"Files": "{{total}} Files",
|
||||
"Folder Name": "Input folder name",
|
||||
"Insert Data": "Insert",
|
||||
"Manual Data": "Manual Data",
|
||||
"Manual Input": "Manual Input",
|
||||
"Manual Mark": "Manual Mark",
|
||||
"Mark Data": "Mark Data",
|
||||
"Move Failed": "Move Failed",
|
||||
"My Dataset": "My Dataset",
|
||||
"No Folder": "No Folder",
|
||||
"Other Data": "Other Data",
|
||||
"Select Dataset": "Select Dataset",
|
||||
"Select Folder": "Enter folder",
|
||||
"Upload Time": "Upload Time",
|
||||
|
@@ -127,6 +127,8 @@
|
||||
"Output": "输出",
|
||||
"Password inconsistency": "两次密码不一致",
|
||||
"Rename": "重命名",
|
||||
"Rename Failed": "重命名失败",
|
||||
"Rename Success": "重命名成功",
|
||||
"Search": "搜索",
|
||||
"Status": "状态",
|
||||
"Update Successful": "更新成功",
|
||||
@@ -214,10 +216,14 @@
|
||||
"Filename": "文件名",
|
||||
"Files": "文件: {{total}}个",
|
||||
"Folder Name": "输入文件夹名称",
|
||||
"Insert Data": "插入",
|
||||
"Manual Data": "手动录入",
|
||||
"Manual Input": "手动录入",
|
||||
"Manual Mark": "手动标注",
|
||||
"Mark Data": "标注数据",
|
||||
"Move Failed": "移动出现错误~",
|
||||
"My Dataset": "我的知识库",
|
||||
"No Folder": "没有子目录了~",
|
||||
"Other Data": "其他数据",
|
||||
"Select Dataset": "选择该知识库",
|
||||
"Select Folder": "进入文件夹",
|
||||
"Upload Time": "上传时间",
|
||||
|
@@ -36,7 +36,8 @@ const ResponseTags = ({ responseData = [] }: { responseData?: ChatHistoryItemRes
|
||||
quoteList: responseData
|
||||
.filter((item) => item.moduleType === FlowModuleTypeEnum.chatNode)
|
||||
.map((item) => item.quoteList)
|
||||
.flat(),
|
||||
.flat()
|
||||
.filter((item) => item) as QuoteItemType[],
|
||||
historyPreview: chatData?.historyPreview,
|
||||
runningTime: +responseData.reduce((sum, item) => sum + (item.runningTime || 0), 0).toFixed(2)
|
||||
};
|
||||
|
@@ -63,6 +63,7 @@ import styles from './index.module.scss';
|
||||
import Script from 'next/script';
|
||||
import { postQuestionGuide } from '@/api/core/ai/agent/api';
|
||||
import { splitGuideModule } from './utils';
|
||||
import { DatasetSpecialIdEnum } from '@fastgpt/core/dataset/constant';
|
||||
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 24);
|
||||
|
||||
@@ -511,6 +512,12 @@ const ChatBox = (
|
||||
|
||||
// add guide text listener
|
||||
useEffect(() => {
|
||||
const windowMessage = ({ data }: MessageEvent<{ type: 'sendPrompt'; text: string }>) => {
|
||||
if (data?.type === 'sendPrompt' && data?.text) {
|
||||
handleSubmit((item) => sendPrompt(item, data.text))();
|
||||
}
|
||||
};
|
||||
window.addEventListener('message', windowMessage);
|
||||
event.on('guideClick', ({ text }: { text: string }) => {
|
||||
if (!text) return;
|
||||
handleSubmit((data) => sendPrompt(data, text))();
|
||||
@@ -518,6 +525,7 @@ const ChatBox = (
|
||||
|
||||
return () => {
|
||||
event.off('guideClick');
|
||||
window.removeEventListener('message', windowMessage);
|
||||
};
|
||||
}, [handleSubmit, sendPrompt]);
|
||||
|
||||
@@ -995,7 +1003,8 @@ const ChatBox = (
|
||||
defaultValues={{
|
||||
dataId: adminMarkData.dataId,
|
||||
q: adminMarkData.q,
|
||||
a: adminMarkData.a
|
||||
a: adminMarkData.a,
|
||||
file_id: DatasetSpecialIdEnum.mark
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
@@ -3,15 +3,6 @@ export enum UserAuthTypeEnum {
|
||||
findPassword = 'findPassword'
|
||||
}
|
||||
|
||||
export const fileImgs = [
|
||||
{ suffix: 'pdf', src: '/imgs/files/pdf.svg' },
|
||||
{ suffix: 'csv', src: '/imgs/files/csv.svg' },
|
||||
{ suffix: '(doc|docs)', src: '/imgs/files/doc.svg' },
|
||||
{ suffix: 'txt', src: '/imgs/files/txt.svg' },
|
||||
{ suffix: 'md', src: '/imgs/files/markdown.svg' },
|
||||
{ suffix: '.', src: '/imgs/files/file.svg' }
|
||||
];
|
||||
|
||||
export enum TrackEventName {
|
||||
windowError = 'windowError',
|
||||
pageError = 'pageError',
|
||||
|
@@ -34,4 +34,3 @@ export const KbTypeMap = {
|
||||
};
|
||||
|
||||
export const FolderAvatarSrc = '/imgs/files/folder.svg';
|
||||
export const OtherFileId = 'other';
|
||||
|
109
projects/app/src/pages/api/admin/initv447.ts
Normal file
109
projects/app/src/pages/api/admin/initv447.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { authUser } from '@/service/utils/auth';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
import { DatasetSpecialIdEnum } from '@fastgpt/core/dataset/constant';
|
||||
import mongoose, { Types } from 'mongoose';
|
||||
import { delay } from '@/utils/tools';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
let initFileIds: string[] = [];
|
||||
try {
|
||||
const { limit = 100 } = req.body;
|
||||
await connectToDatabase();
|
||||
await authUser({ req, authRoot: true });
|
||||
|
||||
console.log('add index');
|
||||
await PgClient.query(
|
||||
`
|
||||
ALTER TABLE modeldata
|
||||
ALTER COLUMN source TYPE VARCHAR(256),
|
||||
ALTER COLUMN file_id TYPE VARCHAR(256);
|
||||
CREATE INDEX IF NOT EXISTS modelData_fileId_index ON modeldata (file_id);
|
||||
`
|
||||
);
|
||||
console.log('index success');
|
||||
console.log('count rows');
|
||||
// 去重获取 fileId
|
||||
const { rows } = await PgClient.query(`SELECT DISTINCT file_id
|
||||
FROM ${PgDatasetTableName} WHERE file_id IS NOT NULL AND file_id != '';
|
||||
`);
|
||||
console.log('count rows success', rows.length);
|
||||
console.log('start filter');
|
||||
for (let i = 0; i < rows.length; i += limit) {
|
||||
await init(rows.slice(i, i + limit), initFileIds);
|
||||
console.log(i);
|
||||
}
|
||||
console.log('filter success');
|
||||
console.log('start update');
|
||||
|
||||
for (let i = 0; i < initFileIds.length; i++) {
|
||||
await PgClient.query(`UPDATE ${PgDatasetTableName}
|
||||
SET file_id = '${DatasetSpecialIdEnum.manual}'
|
||||
WHERE file_id = '${initFileIds[i]}'`);
|
||||
console.log('update: ', initFileIds[i]);
|
||||
}
|
||||
|
||||
const { rows: emptyIds } = await PgClient.query(
|
||||
`SELECT id FROM ${PgDatasetTableName} WHERE file_id IS NULL OR file_id=''`
|
||||
);
|
||||
console.log(emptyIds.length);
|
||||
|
||||
await delay(5000);
|
||||
|
||||
async function start(start: number) {
|
||||
for (let i = start; i < emptyIds.length; i += limit) {
|
||||
await PgClient.query(`UPDATE ${PgDatasetTableName}
|
||||
SET file_id = '${DatasetSpecialIdEnum.manual}'
|
||||
WHERE id = '${emptyIds[i].id}'`);
|
||||
console.log('update: ', i, emptyIds[i].id);
|
||||
}
|
||||
}
|
||||
for (let i = 0; i < limit; i++) {
|
||||
start(i);
|
||||
}
|
||||
|
||||
// await PgClient.query(
|
||||
// `UPDATE ${PgDatasetTableName}
|
||||
// SET file_id = '${DatasetSpecialIdEnum.manual}'
|
||||
// WHERE file_id IS NULL OR file_id = ''`
|
||||
// );
|
||||
|
||||
console.log('update success');
|
||||
|
||||
jsonRes(res, {
|
||||
data: {
|
||||
empty: emptyIds.length
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function init(rows: any[], initFileIds: string[]) {
|
||||
const collection = mongoose.connection.db.collection(`dataset.files`);
|
||||
|
||||
/* 遍历所有的 fileId,去找有没有对应的文件,没有的话则改成manual */
|
||||
const updateResult = await Promise.allSettled(
|
||||
rows.map(async (item) => {
|
||||
// 找下是否有对应的文件
|
||||
const file = await collection.findOne({
|
||||
_id: new Types.ObjectId(item.file_id)
|
||||
});
|
||||
|
||||
if (file) return '';
|
||||
// 没有文件的,改成manual
|
||||
initFileIds.push(item.file_id);
|
||||
|
||||
return item.file_id;
|
||||
})
|
||||
);
|
||||
// @ts-ignore
|
||||
console.log(updateResult.filter((item) => item?.value).length);
|
||||
}
|
@@ -91,6 +91,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
if (res.closed) {
|
||||
return stream.destroy();
|
||||
}
|
||||
q = q.replace(/"/g, '""');
|
||||
a = a.replace(/"/g, '""');
|
||||
source = source?.replace(/"/g, '""');
|
||||
|
||||
write(`\n"${q}","${a || ''}","${source || ''}"`);
|
||||
});
|
||||
// finish
|
||||
|
@@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo';
|
||||
import { authUser } from '@/service/utils/auth';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
import { OtherFileId } from '@/constants/dataset';
|
||||
import type { PgDataItemType } from '@/types/core/dataset/data';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
@@ -36,15 +35,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
['user_id', userId],
|
||||
'AND',
|
||||
['kb_id', kbId],
|
||||
...(fileId
|
||||
? fileId === OtherFileId
|
||||
? ["AND (file_id IS NULL OR file_id = '')"]
|
||||
: ['AND', ['file_id', fileId]]
|
||||
: []),
|
||||
'AND',
|
||||
['file_id', fileId],
|
||||
...(searchText
|
||||
? [
|
||||
'AND',
|
||||
`(q LIKE '%${searchText}%' OR a LIKE '%${searchText}%' OR source LIKE '%${searchText}%')`
|
||||
`(q ILIKE '%${searchText}%' OR a ILIKE '%${searchText}%' OR source ILIKE '%${searchText}%')`
|
||||
]
|
||||
: [])
|
||||
];
|
||||
|
@@ -1,3 +1,7 @@
|
||||
/*
|
||||
insert one data to dataset (immediately insert)
|
||||
manual input or mark data
|
||||
*/
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
@@ -11,7 +15,6 @@ import { DatasetDataItemType } from '@/types/core/dataset/data';
|
||||
import { countPromptTokens } from '@/utils/common/tiktoken';
|
||||
|
||||
export type Props = {
|
||||
billId?: string;
|
||||
kbId: string;
|
||||
data: DatasetDataItemType;
|
||||
};
|
||||
@@ -40,7 +43,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
export async function getVectorAndInsertDataset(
|
||||
props: Props & { userId: string }
|
||||
): Promise<string> {
|
||||
const { kbId, data, userId, billId } = props;
|
||||
const { kbId, data, userId } = props;
|
||||
if (!kbId || !data?.q) {
|
||||
return Promise.reject('缺少参数');
|
||||
}
|
||||
@@ -61,7 +64,7 @@ export async function getVectorAndInsertDataset(
|
||||
const { rows: existsRows } = await PgClient.query(`
|
||||
SELECT COUNT(*) > 0 AS exists
|
||||
FROM ${PgDatasetTableName}
|
||||
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND kb_id='${kbId}'
|
||||
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND file_id='${data.file_id}' AND kb_id='${kbId}'
|
||||
`);
|
||||
const exists = existsRows[0]?.exists || false;
|
||||
|
||||
@@ -72,8 +75,7 @@ export async function getVectorAndInsertDataset(
|
||||
const { vectors } = await getVector({
|
||||
model: kb.vectorModel,
|
||||
input: [q],
|
||||
userId,
|
||||
billId
|
||||
userId
|
||||
});
|
||||
|
||||
const response = await insertData2Dataset({
|
||||
|
@@ -6,7 +6,7 @@ import { GridFSStorage } from '@/service/lib/gridfs';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
import { Types } from 'mongoose';
|
||||
import { OtherFileId } from '@/constants/dataset';
|
||||
import { isSpecialFileId } from '@fastgpt/core/dataset/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -22,14 +22,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const { userId } = await authUser({ req, authToken: true });
|
||||
|
||||
// other data. Delete only vector data
|
||||
if (fileId === OtherFileId) {
|
||||
if (isSpecialFileId(fileId)) {
|
||||
await PgClient.delete(PgDatasetTableName, {
|
||||
where: [
|
||||
['user_id', userId],
|
||||
'AND',
|
||||
['kb_id', kbId],
|
||||
"AND (file_id IS NULL OR file_id = '')"
|
||||
]
|
||||
where: [['user_id', userId], 'AND', ['kb_id', kbId], 'AND', ['file_id', fileId]]
|
||||
});
|
||||
} else {
|
||||
// auth file
|
||||
|
@@ -3,8 +3,12 @@ import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authUser } from '@/service/utils/auth';
|
||||
import { GridFSStorage } from '@/service/lib/gridfs';
|
||||
import { OtherFileId } from '@/constants/dataset';
|
||||
import { datasetSpecialIdMap } from '@fastgpt/core/dataset/constant';
|
||||
import { datasetSpecialIds } from '@fastgpt/core/dataset/constant';
|
||||
import type { GSFileInfoType } from '@/types/common/file';
|
||||
import { strIsLink } from '@fastgpt/common/tools/str';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -14,12 +18,32 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 凭证校验
|
||||
const { userId } = await authUser({ req, authToken: true });
|
||||
|
||||
if (fileId === OtherFileId) {
|
||||
// manual, mark
|
||||
if (datasetSpecialIds.includes(fileId)) {
|
||||
return jsonRes<GSFileInfoType>(res, {
|
||||
data: {
|
||||
id: OtherFileId,
|
||||
id: fileId,
|
||||
size: 0,
|
||||
filename: 'kb.Other Data',
|
||||
// @ts-ignore
|
||||
filename: datasetSpecialIdMap[fileId]?.name || fileId,
|
||||
uploadDate: new Date(),
|
||||
encoding: '',
|
||||
contentType: ''
|
||||
}
|
||||
});
|
||||
}
|
||||
// link file
|
||||
if (strIsLink(fileId)) {
|
||||
const { rows } = await PgClient.select(PgDatasetTableName, {
|
||||
where: [['user_id', userId], 'AND', ['file_id', fileId]],
|
||||
limit: 1,
|
||||
fields: ['source']
|
||||
});
|
||||
return jsonRes<GSFileInfoType>(res, {
|
||||
data: {
|
||||
id: fileId,
|
||||
size: 0,
|
||||
filename: rows[0]?.source || fileId,
|
||||
uploadDate: new Date(),
|
||||
encoding: '',
|
||||
contentType: ''
|
||||
|
@@ -5,7 +5,14 @@ import { authUser } from '@/service/utils/auth';
|
||||
import { GridFSStorage } from '@/service/lib/gridfs';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
import { FileStatusEnum, OtherFileId } from '@/constants/dataset';
|
||||
import { FileStatusEnum } from '@/constants/dataset';
|
||||
import { strIsLink } from '@fastgpt/common/tools/str';
|
||||
import {
|
||||
DatasetSpecialIdEnum,
|
||||
datasetSpecialIdMap,
|
||||
datasetSpecialIds
|
||||
} from '@fastgpt/core/dataset/constant';
|
||||
import { Types } from 'mongoose';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -22,57 +29,106 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 凭证校验
|
||||
const { userId } = await authUser({ req, authToken: true });
|
||||
|
||||
// select and count same file_id data, exclude special id
|
||||
const pgWhere = `user_id = '${userId}' AND kb_id = '${kbId}' ${datasetSpecialIds
|
||||
.map((item) => `AND file_id!='${item}'`)
|
||||
.join(' ')}
|
||||
${searchText ? `AND source ILIKE '%${searchText}%'` : ''}`;
|
||||
|
||||
const [{ rows }, { rowCount: total }] = await Promise.all([
|
||||
PgClient.query(`SELECT file_id, COUNT(*) AS count
|
||||
FROM ${PgDatasetTableName}
|
||||
where ${pgWhere}
|
||||
GROUP BY file_id
|
||||
ORDER BY file_id DESC
|
||||
LIMIT ${pageSize} OFFSET ${(pageNum - 1) * pageSize};
|
||||
`),
|
||||
PgClient.query(`SELECT DISTINCT file_id
|
||||
FROM ${PgDatasetTableName}
|
||||
where ${pgWhere}
|
||||
`)
|
||||
]);
|
||||
|
||||
// find files
|
||||
const gridFs = new GridFSStorage('dataset', userId);
|
||||
const collection = gridFs.Collection();
|
||||
|
||||
const mongoWhere = {
|
||||
['metadata.kbId']: kbId,
|
||||
['metadata.userId']: userId,
|
||||
['metadata.datasetUsed']: true,
|
||||
...(searchText && { filename: { $regex: searchText } })
|
||||
async function getSpecialData() {
|
||||
if (pageNum !== 1) return [];
|
||||
return [
|
||||
{
|
||||
id: DatasetSpecialIdEnum.manual,
|
||||
size: 0,
|
||||
filename: datasetSpecialIdMap[DatasetSpecialIdEnum.manual].name,
|
||||
uploadTime: new Date(),
|
||||
status: FileStatusEnum.ready,
|
||||
chunkLength: await PgClient.count(PgDatasetTableName, {
|
||||
fields: ['id'],
|
||||
where: [
|
||||
['user_id', userId],
|
||||
'AND',
|
||||
['file_id', DatasetSpecialIdEnum.manual],
|
||||
'AND',
|
||||
['kb_id', kbId]
|
||||
]
|
||||
})
|
||||
},
|
||||
{
|
||||
id: DatasetSpecialIdEnum.mark,
|
||||
size: 0,
|
||||
filename: datasetSpecialIdMap[DatasetSpecialIdEnum.mark].name,
|
||||
uploadTime: new Date(),
|
||||
status: FileStatusEnum.ready,
|
||||
chunkLength: await PgClient.count(PgDatasetTableName, {
|
||||
fields: ['id'],
|
||||
where: [
|
||||
['user_id', userId],
|
||||
'AND',
|
||||
['file_id', DatasetSpecialIdEnum.mark],
|
||||
'AND',
|
||||
['kb_id', kbId]
|
||||
]
|
||||
})
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
const data = await Promise.all([
|
||||
getSpecialData(),
|
||||
...rows.map(async (row) => {
|
||||
// link data
|
||||
if (strIsLink(row.file_id)) {
|
||||
const { rows } = await PgClient.select(PgDatasetTableName, {
|
||||
where: [['user_id', userId], 'AND', ['file_id', row.file_id]],
|
||||
limit: 1,
|
||||
fields: ['source']
|
||||
});
|
||||
return {
|
||||
id: row.file_id,
|
||||
size: 0,
|
||||
filename: rows[0]?.source || row.file_id,
|
||||
uploadTime: new Date(),
|
||||
status: FileStatusEnum.ready,
|
||||
chunkLength: row.count
|
||||
};
|
||||
const [files, total] = await Promise.all([
|
||||
collection
|
||||
.find(mongoWhere, {
|
||||
}
|
||||
// file data
|
||||
const file = await collection.findOne(
|
||||
{
|
||||
_id: new Types.ObjectId(row.file_id),
|
||||
['metadata.userId']: userId,
|
||||
['metadata.kbId']: kbId
|
||||
},
|
||||
{
|
||||
projection: {
|
||||
_id: 1,
|
||||
filename: 1,
|
||||
uploadDate: 1,
|
||||
length: 1
|
||||
}
|
||||
})
|
||||
.skip((pageNum - 1) * pageSize)
|
||||
.limit(pageSize)
|
||||
.sort({ uploadDate: -1 })
|
||||
.toArray(),
|
||||
collection.countDocuments(mongoWhere)
|
||||
]);
|
||||
|
||||
async function GetOtherData() {
|
||||
return {
|
||||
id: OtherFileId,
|
||||
size: 0,
|
||||
filename: 'kb.Other Data',
|
||||
uploadTime: new Date(),
|
||||
status: (await TrainingData.findOne({ userId, kbId, file_id: '' }))
|
||||
? FileStatusEnum.embedding
|
||||
: FileStatusEnum.ready,
|
||||
chunkLength: await PgClient.count(PgDatasetTableName, {
|
||||
fields: ['id'],
|
||||
where: [
|
||||
['user_id', userId],
|
||||
'AND',
|
||||
['kb_id', kbId],
|
||||
"AND (file_id IS NULL OR file_id = '')"
|
||||
]
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
const data = await Promise.all([
|
||||
GetOtherData(),
|
||||
...files.map(async (file) => {
|
||||
);
|
||||
if (!file) return null;
|
||||
return {
|
||||
id: String(file._id),
|
||||
size: file.length,
|
||||
@@ -81,16 +137,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
status: (await TrainingData.findOne({ userId, kbId, file_id: file._id }))
|
||||
? FileStatusEnum.embedding
|
||||
: FileStatusEnum.ready,
|
||||
chunkLength: await PgClient.count(PgDatasetTableName, {
|
||||
fields: ['id'],
|
||||
where: [
|
||||
['user_id', userId],
|
||||
'AND',
|
||||
['kb_id', kbId],
|
||||
'AND',
|
||||
['file_id', String(file._id)]
|
||||
]
|
||||
})
|
||||
chunkLength: row.count
|
||||
};
|
||||
})
|
||||
]);
|
||||
@@ -99,7 +146,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
data: {
|
||||
pageNum,
|
||||
pageSize,
|
||||
data: data.flat(),
|
||||
data: data.flat().filter((item) => item),
|
||||
total
|
||||
}
|
||||
});
|
||||
|
@@ -8,6 +8,7 @@ import { Types } from 'mongoose';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
import { addLog } from '@/service/utils/tools';
|
||||
import { strIsLink } from '@fastgpt/common/tools/str';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -19,6 +20,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const gridFs = new GridFSStorage('dataset', userId);
|
||||
const collection = gridFs.Collection();
|
||||
|
||||
if (id.length === 24 && !strIsLink(id)) {
|
||||
await collection.findOneAndUpdate(
|
||||
{
|
||||
_id: new Types.ObjectId(id)
|
||||
@@ -30,9 +32,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
// data source
|
||||
updateDatasetSource({
|
||||
await updateDatasetSource({
|
||||
fileId: id,
|
||||
userId,
|
||||
name
|
||||
|
@@ -1,31 +1,28 @@
|
||||
import React, { useCallback, useState, useRef, useMemo } from 'react';
|
||||
import { Box, Card, IconButton, Flex, Grid, Image } from '@chakra-ui/react';
|
||||
import { Box, Card, IconButton, Flex, Grid, Image, Button } from '@chakra-ui/react';
|
||||
import type { PgDataItemType } from '@/types/core/dataset/data';
|
||||
import { usePagination } from '@/hooks/usePagination';
|
||||
import {
|
||||
getDatasetDataList,
|
||||
delOneDatasetDataById,
|
||||
getTrainingData
|
||||
} from '@/api/core/dataset/data';
|
||||
import { getDatasetDataList, delOneDatasetDataById } from '@/api/core/dataset/data';
|
||||
import { getFileInfoById } from '@/api/core/dataset/file';
|
||||
import { DeleteIcon, RepeatIcon } from '@chakra-ui/icons';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import InputModal, { FormData as InputDataType } from './InputDataModal';
|
||||
import InputModal, { FormData as InputDataType, RawFileText } from './InputDataModal';
|
||||
import { debounce } from 'lodash';
|
||||
import { getErrText } from '@/utils/tools';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useRouter } from 'next/router';
|
||||
import MyIcon from '@/components/Icon';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import MyInput from '@/components/MyInput';
|
||||
import { fileImgs } from '@/constants/common';
|
||||
import { useLoading } from '@/hooks/useLoading';
|
||||
import { getFileIcon, getSpecialFileIcon } from '@fastgpt/common/tools/file';
|
||||
|
||||
const DataCard = ({ kbId }: { kbId: string }) => {
|
||||
const BoxRef = useRef<HTMLDivElement>(null);
|
||||
const lastSearch = useRef('');
|
||||
const router = useRouter();
|
||||
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
|
||||
const { fileId = '' } = router.query as { fileId: string };
|
||||
const { t } = useTranslation();
|
||||
const [searchText, setSearchText] = useState('');
|
||||
@@ -37,7 +34,6 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
||||
|
||||
const {
|
||||
data: kbDataList,
|
||||
isLoading,
|
||||
Pagination,
|
||||
total,
|
||||
getData,
|
||||
@@ -52,6 +48,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
||||
fileId
|
||||
},
|
||||
onChange() {
|
||||
setIsLoading(false);
|
||||
if (BoxRef.current) {
|
||||
BoxRef.current.scrollTop = 0;
|
||||
}
|
||||
@@ -72,9 +69,8 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
||||
// get file info
|
||||
const { data: fileInfo } = useQuery(['getFileInfo', fileId], () => getFileInfoById(fileId));
|
||||
const fileIcon = useMemo(
|
||||
() =>
|
||||
fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(fileInfo?.filename || ''))?.src,
|
||||
[fileInfo?.filename]
|
||||
() => getSpecialFileIcon(fileInfo?.id) || getFileIcon(fileInfo?.filename),
|
||||
[fileInfo?.filename, fileInfo?.id]
|
||||
);
|
||||
|
||||
return (
|
||||
@@ -82,10 +78,9 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
||||
<Flex alignItems={'center'}>
|
||||
<IconButton
|
||||
mr={3}
|
||||
icon={<MyIcon name={'backFill'} w={'18px'} color={'myBlue.600'} />}
|
||||
icon={<MyIcon name={'backFill'} w={['14px', '18px']} color={'myBlue.600'} />}
|
||||
bg={'white'}
|
||||
boxShadow={'1px 1px 9px rgba(0,0,0,0.15)'}
|
||||
h={'28px'}
|
||||
size={'sm'}
|
||||
borderRadius={'50%'}
|
||||
aria-label={''}
|
||||
@@ -98,30 +93,34 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
||||
})
|
||||
}
|
||||
/>
|
||||
<Flex
|
||||
className="textEllipsis"
|
||||
flex={'1 0 0'}
|
||||
mr={[3, 5]}
|
||||
fontSize={['sm', 'md']}
|
||||
alignItems={'center'}
|
||||
>
|
||||
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
|
||||
<Image src={fileIcon || '/imgs/files/file.svg'} w={'16px'} mr={2} alt={''} />
|
||||
{t(fileInfo?.filename || 'Filename')}
|
||||
<RawFileText
|
||||
filename={fileInfo?.filename}
|
||||
fileId={fileInfo?.id}
|
||||
fontSize={['md', 'lg']}
|
||||
color={'black'}
|
||||
textDecoration={'none'}
|
||||
/>
|
||||
</Flex>
|
||||
<Box>
|
||||
<MyTooltip label={'刷新'}>
|
||||
<IconButton
|
||||
icon={<RepeatIcon />}
|
||||
size={['sm', 'md']}
|
||||
aria-label={'refresh'}
|
||||
<Button
|
||||
ml={2}
|
||||
variant={'base'}
|
||||
isLoading={isLoading}
|
||||
size={['sm', 'md']}
|
||||
onClick={() => {
|
||||
getData(pageNum);
|
||||
getTrainingData({ kbId, init: true });
|
||||
if (!fileInfo) return;
|
||||
setEditInputData({
|
||||
dataId: '',
|
||||
q: '',
|
||||
a: '',
|
||||
source: fileInfo.filename,
|
||||
file_id: fileInfo.id
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</MyTooltip>
|
||||
>
|
||||
{t('kb.Insert Data')}
|
||||
</Button>
|
||||
</Box>
|
||||
</Flex>
|
||||
<Flex my={3} alignItems={'center'}>
|
||||
@@ -249,6 +248,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
||||
/>
|
||||
)}
|
||||
<ConfirmModal />
|
||||
<Loading fixed={false} />
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
@@ -22,16 +22,17 @@ import { useTranslation } from 'react-i18next';
|
||||
import MyIcon from '@/components/Icon';
|
||||
import MyInput from '@/components/MyInput';
|
||||
import dayjs from 'dayjs';
|
||||
import { fileImgs } from '@/constants/common';
|
||||
import { useRequest } from '@/hooks/useRequest';
|
||||
import { useLoading } from '@/hooks/useLoading';
|
||||
import { FileStatusEnum, OtherFileId } from '@/constants/dataset';
|
||||
import { FileStatusEnum } from '@/constants/dataset';
|
||||
import { useRouter } from 'next/router';
|
||||
import { usePagination } from '@/hooks/usePagination';
|
||||
import type { DatasetFileItemType } from '@/types/core/dataset/file';
|
||||
import { useGlobalStore } from '@/store/global';
|
||||
import MyMenu from '@/components/MyMenu';
|
||||
import { useEditTitle } from '@/hooks/useEditTitle';
|
||||
import { datasetSpecialIds } from '@fastgpt/core/dataset/constant';
|
||||
import { getFileIcon, getSpecialFileIcon } from '@fastgpt/common/tools/file';
|
||||
|
||||
const FileCard = ({ kbId }: { kbId: string }) => {
|
||||
const BoxRef = useRef<HTMLDivElement>(null);
|
||||
@@ -79,10 +80,14 @@ const FileCard = ({ kbId }: { kbId: string }) => {
|
||||
// add file icon
|
||||
const formatFiles = useMemo(
|
||||
() =>
|
||||
files.map((file) => ({
|
||||
files.map((file) => {
|
||||
const icon = getSpecialFileIcon(file.id) || getFileIcon(file.filename);
|
||||
|
||||
return {
|
||||
...file,
|
||||
icon: fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(file.filename))?.src
|
||||
})),
|
||||
icon
|
||||
};
|
||||
}),
|
||||
[files]
|
||||
);
|
||||
|
||||
@@ -114,8 +119,8 @@ const FileCard = ({ kbId }: { kbId: string }) => {
|
||||
onSettled() {
|
||||
setLoading(false);
|
||||
},
|
||||
successToast: t('common.Delete Success'),
|
||||
errorToast: t('common.Delete Failed')
|
||||
successToast: t('common.Rename Success'),
|
||||
errorToast: t('common.Rename Failed')
|
||||
});
|
||||
|
||||
const { onOpenModal, EditModal: EditTitleModal } = useEditTitle({
|
||||
@@ -135,11 +140,15 @@ const FileCard = ({ kbId }: { kbId: string }) => {
|
||||
|
||||
// training data
|
||||
const { data: { qaListLen = 0, vectorListLen = 0 } = {}, refetch: refetchTrainingData } =
|
||||
useQuery(['getModelSplitDataList', kbId], () => getTrainingData({ kbId, init: false }), {
|
||||
useQuery(
|
||||
['getModelSplitDataList', kbId],
|
||||
() => getTrainingData({ kbId, init: Math.random() > 0.7 }),
|
||||
{
|
||||
onError(err) {
|
||||
console.log(err);
|
||||
}
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
useQuery(
|
||||
['refetchTrainingData', kbId],
|
||||
@@ -279,7 +288,7 @@ const FileCard = ({ kbId }: { kbId: string }) => {
|
||||
</MenuButton>
|
||||
}
|
||||
menuList={[
|
||||
...(file.id !== OtherFileId
|
||||
...(!datasetSpecialIds.includes(file.id)
|
||||
? [
|
||||
{
|
||||
child: (
|
||||
|
@@ -3,13 +3,14 @@ import { useTranslation } from 'next-i18next';
|
||||
import MyModal from '@/components/MyModal';
|
||||
import { Box, Input, Textarea, ModalBody, ModalFooter, Button } from '@chakra-ui/react';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { useRequest } from '@/hooks/useRequest';
|
||||
|
||||
const CreateFileModal = ({
|
||||
onClose,
|
||||
onSuccess
|
||||
}: {
|
||||
onClose: () => void;
|
||||
onSuccess: (e: { filename: string; content: string }) => void;
|
||||
onSuccess: (e: { filename: string; content: string }) => Promise<void>;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const { register, handleSubmit } = useForm({
|
||||
@@ -19,6 +20,13 @@ const CreateFileModal = ({
|
||||
}
|
||||
});
|
||||
|
||||
const { mutate, isLoading } = useRequest({
|
||||
mutationFn: () => handleSubmit(onSuccess)(),
|
||||
onSuccess: () => {
|
||||
onClose();
|
||||
}
|
||||
});
|
||||
|
||||
return (
|
||||
<MyModal title={t('file.Create File')} isOpen w={'600px'} top={'15vh'}>
|
||||
<ModalBody>
|
||||
@@ -47,12 +55,7 @@ const CreateFileModal = ({
|
||||
<Button variant={'base'} mr={4} onClick={onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
onClick={() => {
|
||||
handleSubmit(onSuccess)();
|
||||
onClose();
|
||||
}}
|
||||
>
|
||||
<Button isLoading={isLoading} onClick={mutate}>
|
||||
确认
|
||||
</Button>
|
||||
</ModalFooter>
|
||||
|
@@ -12,7 +12,6 @@ import {
|
||||
readDocContent
|
||||
} from '@/utils/web/file';
|
||||
import { Box, Flex, useDisclosure, type BoxProps } from '@chakra-ui/react';
|
||||
import { fileImgs } from '@/constants/common';
|
||||
import { DragEvent, useCallback, useState } from 'react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
@@ -22,12 +21,13 @@ import { FetchResultItem } from '@/types/plugin';
|
||||
import type { DatasetDataItemType } from '@/types/core/dataset/data';
|
||||
import { getErrText } from '@/utils/tools';
|
||||
import { useDatasetStore } from '@/store/dataset';
|
||||
import { getFileIcon } from '@fastgpt/common/tools/file';
|
||||
|
||||
const UrlFetchModal = dynamic(() => import('./UrlFetchModal'));
|
||||
const CreateFileModal = dynamic(() => import('./CreateFileModal'));
|
||||
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
|
||||
const csvTemplate = `index,content,source\n"被索引的内容","对应的答案。CSV 中请注意内容不能包含双引号,双引号是列分割符号","来源,可选。"\n"什么是 laf","laf 是一个云函数开发平台……",""\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……",""`;
|
||||
const csvTemplate = `index,content\n"被索引的内容","对应的答案。CSV 中请注意内容不能包含双引号,双引号是列分割符号"\n"什么是 laf","laf 是一个云函数开发平台……",""\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……"`;
|
||||
|
||||
export type FileItemType = {
|
||||
id: string;
|
||||
@@ -63,7 +63,7 @@ const FileSelect = ({
|
||||
|
||||
const { toast } = useToast();
|
||||
|
||||
const { File, onOpen } = useSelectFile({
|
||||
const { File: FileSelector, onOpen } = useSelectFile({
|
||||
fileType: fileExtension,
|
||||
multiple: true
|
||||
});
|
||||
@@ -92,11 +92,9 @@ const FileSelect = ({
|
||||
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
||||
|
||||
/* text file */
|
||||
const icon = fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(file.name))?.src;
|
||||
const icon = getFileIcon(file?.name);
|
||||
|
||||
if (!icon) {
|
||||
continue;
|
||||
}
|
||||
if (!icon) continue;
|
||||
|
||||
// parse and upload files
|
||||
let [text, filesId] = await Promise.all([
|
||||
@@ -165,7 +163,7 @@ const FileSelect = ({
|
||||
.map((item) => ({
|
||||
q: item[0] || '',
|
||||
a: item[1] || '',
|
||||
source: item[2] || file.name || '',
|
||||
source: file.name || '',
|
||||
file_id: filesId[0]
|
||||
}))
|
||||
};
|
||||
@@ -201,7 +199,8 @@ const FileSelect = ({
|
||||
chunks: splitRes.chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: '',
|
||||
source: url
|
||||
source: url,
|
||||
file_id: url
|
||||
}))
|
||||
};
|
||||
});
|
||||
@@ -210,15 +209,25 @@ const FileSelect = ({
|
||||
[chunkLen, onPushFiles]
|
||||
);
|
||||
const onCreateFile = useCallback(
|
||||
({ filename, content }: { filename: string; content: string }) => {
|
||||
async ({ filename, content }: { filename: string; content: string }) => {
|
||||
content = simpleText(content);
|
||||
|
||||
// create virtual txt file
|
||||
const txtBlob = new Blob([content], { type: 'text/plain' });
|
||||
const txtFile = new File([txtBlob], `${filename}.txt`, {
|
||||
type: txtBlob.type,
|
||||
lastModified: new Date().getTime()
|
||||
});
|
||||
const fileIds = await uploadFiles([txtFile], { kbId: kbDetail._id });
|
||||
|
||||
const splitRes = splitText2Chunks({
|
||||
text: content,
|
||||
maxLen: chunkLen
|
||||
});
|
||||
|
||||
onPushFiles([
|
||||
{
|
||||
id: nanoid(),
|
||||
id: fileIds[0],
|
||||
filename,
|
||||
icon: '/imgs/files/txt.svg',
|
||||
text: content,
|
||||
@@ -226,12 +235,13 @@ const FileSelect = ({
|
||||
chunks: splitRes.chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: '',
|
||||
source: filename
|
||||
source: filename,
|
||||
file_id: fileIds[0]
|
||||
}))
|
||||
}
|
||||
]);
|
||||
},
|
||||
[chunkLen, onPushFiles]
|
||||
[chunkLen, kbDetail._id, onPushFiles]
|
||||
);
|
||||
|
||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||
@@ -383,7 +393,7 @@ const FileSelect = ({
|
||||
{selectingText !== undefined && (
|
||||
<FileSelectLoading loading text={selectingText} fixed={false} />
|
||||
)}
|
||||
<File onSelect={onSelectFile} />
|
||||
<FileSelector onSelect={onSelectFile} />
|
||||
{isOpenUrlFetch && <UrlFetchModal onClose={onCloseUrlFetch} onSuccess={onUrlFetch} />}
|
||||
{isOpenCreateFile && <CreateFileModal onClose={onCloseCreateFile} onSuccess={onCreateFile} />}
|
||||
</Box>
|
||||
|
@@ -4,11 +4,11 @@ import { useForm } from 'react-hook-form';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { useRequest } from '@/hooks/useRequest';
|
||||
import { getErrText } from '@/utils/tools';
|
||||
import { postChunks2Dataset } from '@/api/core/dataset/data';
|
||||
import { TrainingModeEnum } from '@/constants/plugin';
|
||||
import { postData2Dataset } from '@/api/core/dataset/data';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||
import { useDatasetStore } from '@/store/dataset';
|
||||
import { DatasetSpecialIdEnum, datasetSpecialIdMap } from '@fastgpt/core/dataset/constant';
|
||||
|
||||
type ManualFormType = { q: string; a: string };
|
||||
|
||||
@@ -33,23 +33,16 @@ const ManualImport = ({ kbId }: { kbId: string }) => {
|
||||
}
|
||||
|
||||
try {
|
||||
const data = {
|
||||
await postData2Dataset({
|
||||
kbId,
|
||||
data: {
|
||||
a: e.a,
|
||||
q: e.q,
|
||||
source: '手动录入'
|
||||
};
|
||||
const { insertLen } = await postChunks2Dataset({
|
||||
kbId,
|
||||
mode: TrainingModeEnum.index,
|
||||
data: [data]
|
||||
source: datasetSpecialIdMap[DatasetSpecialIdEnum.manual]?.sourceName,
|
||||
file_id: DatasetSpecialIdEnum.manual
|
||||
}
|
||||
});
|
||||
|
||||
if (insertLen === 0) {
|
||||
toast({
|
||||
title: '已存在完全一致的数据',
|
||||
status: 'warning'
|
||||
});
|
||||
} else {
|
||||
toast({
|
||||
title: '导入数据成功,需要一段时间训练',
|
||||
status: 'success'
|
||||
@@ -58,7 +51,6 @@ const ManualImport = ({ kbId }: { kbId: string }) => {
|
||||
a: '',
|
||||
q: ''
|
||||
});
|
||||
}
|
||||
} catch (err: any) {
|
||||
toast({
|
||||
title: getErrText(err, '出现了点意外~'),
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import React, { useState, useCallback } from 'react';
|
||||
import React, { useState, useCallback, useMemo } from 'react';
|
||||
import { Box, Flex, Button, Textarea, IconButton, BoxProps } from '@chakra-ui/react';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import {
|
||||
@@ -17,6 +17,9 @@ import { DatasetDataItemType } from '@/types/core/dataset/data';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useDatasetStore } from '@/store/dataset';
|
||||
import { getFileAndOpen } from '@/utils/web/file';
|
||||
import { datasetSpecialIdMap, datasetSpecialIds } from '@fastgpt/core/dataset/constant';
|
||||
import { strIsLink } from '@fastgpt/common/tools/str';
|
||||
import { useGlobalStore } from '@/store/global';
|
||||
|
||||
export type FormData = { dataId?: string } & DatasetDataItemType;
|
||||
|
||||
@@ -25,16 +28,13 @@ const InputDataModal = ({
|
||||
onSuccess,
|
||||
onDelete,
|
||||
kbId,
|
||||
defaultValues = {
|
||||
a: '',
|
||||
q: ''
|
||||
}
|
||||
defaultValues
|
||||
}: {
|
||||
onClose: () => void;
|
||||
onSuccess: (data: FormData) => void;
|
||||
onDelete?: () => void;
|
||||
kbId: string;
|
||||
defaultValues?: FormData;
|
||||
defaultValues: FormData;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const [loading, setLoading] = useState(false);
|
||||
@@ -64,10 +64,10 @@ const InputDataModal = ({
|
||||
|
||||
try {
|
||||
const data = {
|
||||
...e,
|
||||
dataId: '',
|
||||
a: e.a,
|
||||
q: e.q,
|
||||
source: '手动录入'
|
||||
// @ts-ignore
|
||||
source: e.source || datasetSpecialIdMap[e.file_id]?.sourceName
|
||||
};
|
||||
data.dataId = await postData2Dataset({
|
||||
kbId,
|
||||
@@ -79,6 +79,7 @@ const InputDataModal = ({
|
||||
status: 'success'
|
||||
});
|
||||
reset({
|
||||
...e,
|
||||
a: '',
|
||||
q: ''
|
||||
});
|
||||
@@ -103,9 +104,9 @@ const InputDataModal = ({
|
||||
setLoading(true);
|
||||
try {
|
||||
const data = {
|
||||
...e,
|
||||
dataId: e.dataId,
|
||||
kbId,
|
||||
a: e.a,
|
||||
q: e.q === defaultValues.q ? '' : e.q
|
||||
};
|
||||
await putDatasetDataById(data);
|
||||
@@ -259,31 +260,40 @@ interface RawFileTextProps extends BoxProps {
|
||||
export function RawFileText({ fileId, filename = '', ...props }: RawFileTextProps) {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const { setLoading } = useGlobalStore();
|
||||
|
||||
const hasFile = useMemo(() => fileId && !datasetSpecialIds.includes(fileId), [fileId]);
|
||||
|
||||
return (
|
||||
<MyTooltip label={fileId ? t('file.Click to view file') || '' : ''} shouldWrapChildren={false}>
|
||||
<MyTooltip label={hasFile ? t('file.Click to view file') || '' : ''} shouldWrapChildren={false}>
|
||||
<Box
|
||||
color={'myGray.600'}
|
||||
display={'inline-block'}
|
||||
whiteSpace={'nowrap'}
|
||||
{...(!!fileId
|
||||
{...(hasFile
|
||||
? {
|
||||
cursor: 'pointer',
|
||||
textDecoration: 'underline',
|
||||
onClick: async () => {
|
||||
if (strIsLink(fileId)) {
|
||||
return window.open(fileId, '_blank');
|
||||
}
|
||||
setLoading(true);
|
||||
try {
|
||||
await getFileAndOpen(fileId);
|
||||
await getFileAndOpen(fileId as string);
|
||||
} catch (error) {
|
||||
toast({
|
||||
title: getErrText(error, '获取文件地址失败'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
: {})}
|
||||
{...props}
|
||||
>
|
||||
{filename}
|
||||
{t(filename)}
|
||||
</Box>
|
||||
</MyTooltip>
|
||||
);
|
||||
|
@@ -179,8 +179,8 @@ export const insertData2Dataset = ({
|
||||
values: data.map((item) => [
|
||||
{ key: 'user_id', value: userId },
|
||||
{ key: 'kb_id', value: kbId },
|
||||
{ key: 'source', value: item.source?.slice(0, 60)?.trim() || '' },
|
||||
{ key: 'file_id', value: item.file_id || '' },
|
||||
{ key: 'source', value: item.source?.slice(0, 200)?.trim() || '' },
|
||||
{ key: 'file_id', value: item.file_id?.slice(0, 200)?.trim() || '' },
|
||||
{ key: 'q', value: item.q.replace(/'/g, '"') },
|
||||
{ key: 'a', value: item.a.replace(/'/g, '"') },
|
||||
{ key: 'vector', value: `[${item.vector}]` }
|
||||
@@ -198,13 +198,14 @@ export async function initPg() {
|
||||
vector VECTOR(1536) NOT NULL,
|
||||
user_id VARCHAR(50) NOT NULL,
|
||||
kb_id VARCHAR(50),
|
||||
source VARCHAR(100),
|
||||
file_id VARCHAR(100),
|
||||
source VARCHAR(256),
|
||||
file_id VARCHAR(256),
|
||||
q TEXT NOT NULL,
|
||||
a TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS modelData_userId_index ON ${PgDatasetTableName} USING HASH (user_id);
|
||||
CREATE INDEX IF NOT EXISTS modelData_kbId_index ON ${PgDatasetTableName} USING HASH (kb_id);
|
||||
CREATE INDEX IF NOT EXISTS modelData_kb_id_index ON ${PgDatasetTableName} (kb_id);
|
||||
CREATE INDEX IF NOT EXISTS modelData_fileId_index ON ${PgDatasetTableName} (file_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_data_md5_q_a_user_id_kb_id ON ${PgDatasetTableName} (md5(q), md5(a), user_id, kb_id);
|
||||
`);
|
||||
console.log('init pg successful');
|
||||
|
@@ -2,9 +2,9 @@ export type DatasetDataItemType = {
|
||||
q: string; // 提问词
|
||||
a: string; // 原文
|
||||
source?: string;
|
||||
file_id?: string;
|
||||
file_id: string;
|
||||
};
|
||||
|
||||
export type PgDataItemType = DatasetItemType & {
|
||||
export type PgDataItemType = DatasetDataItemType & {
|
||||
id: string;
|
||||
};
|
||||
|
Reference in New Issue
Block a user