Change embedding (#1463)

* rebuild embedding queue

* dataset menu

* feat: rebuild data api

* feat: ui change embedding model

* dataset ui

* feat: rebuild index ui

* rename collection
This commit is contained in:
Archer
2024-05-13 14:51:42 +08:00
committed by GitHub
parent 59fd94384d
commit 80a84a5733
37 changed files with 1260 additions and 419 deletions

View File

@@ -10,14 +10,19 @@
"scope": "javascript,typescript",
"prefix": "nextapi",
"body": [
"import type { NextApiRequest, NextApiResponse } from 'next';",
"import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';",
"import { NextAPI } from '@/service/middle/entry';",
"",
"type Props = {};",
"export type ${TM_FILENAME_BASE}Query = {};",
"",
"type Response = {};",
"export type ${TM_FILENAME_BASE}Body = {};",
"",
"async function handler(req: NextApiRequest, res: NextApiResponse<any>): Promise<Response> {",
"export type ${TM_FILENAME_BASE}Response = {};",
"",
"async function handler(",
" req: ApiRequestProps<getDatasetTrainingQueueBody, getDatasetTrainingQueueQuery>,",
" res: ApiResponseType<any>",
"): Promise<getDatasetTrainingQueueResponse> {",
" $1",
" return {}",
"}",
@@ -25,5 +30,30 @@
"export default NextAPI(handler);"
],
"description": "FastGPT Next API template"
},
"use context template": {
"scope": "typescriptreact",
"prefix": "context",
"body": [
"import { ReactNode } from 'react';",
"import { createContext } from 'use-context-selector';",
"",
"type ContextType = {$1};",
"",
"type ContextValueType = {};",
"",
"export const Context = createContext<ContextType>({});",
"",
"export const ContextProvider = ({",
" children,",
" value",
"}: {",
" children: ReactNode;",
" value: ContextValueType;",
"}) => {",
" return <Context.Provider value={value}>{children}</Context.Provider>;",
"};",
],
"description": "FastGPT usecontext template"
}
}

View File

@@ -11,5 +11,6 @@
"i18n-ally.sortKeys": true,
"i18n-ally.keepFulfilled": false,
"i18n-ally.sourceLanguage": "zh", // 根据此语言文件翻译其他语言文件的变量和内容
"i18n-ally.displayLanguage": "zh" // 显示语言
"i18n-ally.displayLanguage": "zh", // 显示语言
"i18n-ally.extract.targetPickingStrategy": "most-similar-by-key"
}

View File

@@ -1,5 +1,5 @@
---
title: 'V4.8(开发中)'
title: 'V4.8'
description: 'FastGPT V4.8 更新说明'
icon: 'upgrade'
draft: false

View File

@@ -0,0 +1,38 @@
---
title: 'V4.8.1(进行中)'
description: 'FastGPT V4.8.1 更新说明'
icon: 'upgrade'
draft: false
toc: true
weight: 825
---
## 初始化脚本
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成FastGPT的域名。
```bash
curl --location --request POST 'https://{{host}}/api/admin/initv481' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
由于之前集合名不规范,该初始化会重置表名。请在初始化前,确保 dataset.trainings 表没有数据。
最好更新该版本时,暂停所有进行中业务,再进行初始化,避免数据冲突。
## 执行脏数据清理
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成FastGPT的域名。
```bash
curl --location --request POST 'https://{{host}}/api/admin/clearInvalidData' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
初始化完后,可以执行这个命令。之前定时清理的定时器有些问题,部分数据没被清理,可以手动执行清理。
## V4.8.1 更新说明
1. 新增 - 知识库重新选择向量模型重建
2. 修复 - 定时器清理脏数据任务

View File

@@ -80,6 +80,7 @@ export type DatasetDataSchemaType = {
a: string; // answer or custom content
fullTextToken: string;
indexes: DatasetDataIndexItemType[];
rebuilding?: boolean;
};
export type DatasetTrainingSchemaType = {
@@ -95,6 +96,7 @@ export type DatasetTrainingSchemaType = {
mode: `${TrainingModeEnum}`;
model: string;
prompt: string;
dataId?: string;
q: string;
a: string;
chunkIndex: number;

View File

@@ -1,6 +1,6 @@
export const TeamCollectionName = 'teams';
export const TeamMemberCollectionName = 'team.members';
export const TeamTagsCollectionName = 'team.tags';
export const TeamMemberCollectionName = 'team_members';
export const TeamTagsCollectionName = 'team_tags';
export enum TeamMemberRoleEnum {
owner = 'owner',

View File

@@ -2,7 +2,7 @@ import { connectionMongo, type Model } from '../../mongo';
const { Schema, model, models } = connectionMongo;
import { RawTextBufferSchemaType } from './type';
export const collectionName = 'buffer.rawText';
export const collectionName = 'buffer_rawtexts';
const RawTextBufferSchema = new Schema({
sourceId: {

View File

@@ -2,7 +2,7 @@ import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { TTSBufferSchemaType } from './type.d';
export const collectionName = 'buffer.tts';
export const collectionName = 'buffer_tts';
const TTSBufferSchema = new Schema({
bufferId: {

View File

@@ -12,8 +12,6 @@ export const mongoSessionRun = async <T = unknown>(fn: (session: ClientSession)
return result as T;
} catch (error) {
console.log(error);
await session.abortTransaction();
await session.endSession();
return Promise.reject(error);

View File

@@ -98,12 +98,15 @@ export const deleteDatasetDataVector = async (
return `${teamIdWhere} ${datasetIdWhere}`;
}
if ('idList' in props && props.idList) {
if ('idList' in props && Array.isArray(props.idList)) {
if (props.idList.length === 0) return;
return `${teamIdWhere} id IN (${props.idList.map((id) => `'${String(id)}'`).join(',')})`;
}
return Promise.reject('deleteDatasetData: no where');
})();
if (!where) return;
try {
await PgClient.delete(PgDatasetTableName, {
where: [where]

View File

@@ -2,7 +2,7 @@ import { connectionMongo, type Model } from '../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { AppVersionSchemaType } from '@fastgpt/global/core/app/version';
export const AppVersionCollectionName = 'app.versions';
export const AppVersionCollectionName = 'app_versions';
const AppVersionSchema = new Schema({
appId: {

View File

@@ -8,7 +8,7 @@ import {
TeamMemberCollectionName
} from '@fastgpt/global/support/user/team/constant';
export const DatasetColCollectionName = 'dataset.collections';
export const DatasetColCollectionName = 'dataset_collections';
const DatasetCollectionSchema = new Schema({
parentId: {

View File

@@ -8,7 +8,7 @@ import {
import { DatasetCollectionName } from '../schema';
import { DatasetColCollectionName } from '../collection/schema';
export const DatasetDataCollectionName = 'dataset.datas';
export const DatasetDataCollectionName = 'dataset_datas';
const DatasetDataSchema = new Schema({
teamId: {
@@ -73,7 +73,8 @@ const DatasetDataSchema = new Schema({
},
inited: {
type: Boolean
}
},
rebuilding: Boolean
});
try {
@@ -90,10 +91,13 @@ try {
{ background: true }
);
DatasetDataSchema.index({ updateTime: 1 }, { background: true });
// rebuild data
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 }, { background: true });
} catch (error) {
console.log(error);
}
export const MongoDatasetData: Model<DatasetDataSchemaType> =
models[DatasetDataCollectionName] || model(DatasetDataCollectionName, DatasetDataSchema);
MongoDatasetData.syncIndexes();

View File

@@ -10,7 +10,7 @@ import {
TeamMemberCollectionName
} from '@fastgpt/global/support/user/team/constant';
export const DatasetTrainingCollectionName = 'dataset.trainings';
export const DatasetTrainingCollectionName = 'dataset_trainings';
const TrainingDataSchema = new Schema({
teamId: {
@@ -35,8 +35,7 @@ const TrainingDataSchema = new Schema({
},
billId: {
// concat bill
type: String,
default: ''
type: Schema.Types.ObjectId
},
mode: {
type: String,
@@ -78,6 +77,9 @@ const TrainingDataSchema = new Schema({
type: Number,
default: 0
},
dataId: {
type: Schema.Types.ObjectId
},
indexes: {
type: [
{

8
packages/service/type/next.d.ts vendored Normal file
View File

@@ -0,0 +1,8 @@
import type { NextApiRequest, NextApiResponse } from 'next';
export type ApiRequestProps<Body = any, Query = any> = Omit<NextApiRequest, 'query' | 'body'> & {
query: Query;
body: Body;
};
export type { NextApiResponse as ApiResponseType } from 'next';

View File

@@ -0,0 +1,9 @@
import React from 'react';
import { Divider, type DividerProps } from '@chakra-ui/react';
const MyDivider = (props: DividerProps) => {
const { h } = props;
return <Divider my={4} borderBottomWidth={h || '1x'} {...props}></Divider>;
};
export default MyDivider;

View File

@@ -1,5 +1,5 @@
import React from 'react';
import { Tooltip, TooltipProps, css, useMediaQuery } from '@chakra-ui/react';
import { Box, Tooltip, TooltipProps, css, useMediaQuery } from '@chakra-ui/react';
interface Props extends TooltipProps {
forceShow?: boolean;
@@ -9,6 +9,13 @@ const MyTooltip = ({ children, forceShow = false, shouldWrapChildren = true, ...
const [isPc] = useMediaQuery('(min-width: 900px)');
return isPc || forceShow ? (
<Box
css={css({
'& span': {
display: 'block'
}
})}
>
<Tooltip
className="tooltip"
bg={'white'}
@@ -27,6 +34,7 @@ const MyTooltip = ({ children, forceShow = false, shouldWrapChildren = true, ...
>
{children}
</Tooltip>
</Box>
) : (
<>{children}</>
);

View File

@@ -1,6 +1,12 @@
{
"parser": "@typescript-eslint/parser", // 确保使用了 TypeScript 解析器
"plugins": ["@typescript-eslint"], // 引入 TypeScript 插件
"extends": "next/core-web-vitals",
"rules": {
"react-hooks/rules-of-hooks": 0
"react-hooks/rules-of-hooks": 0,
"@typescript-eslint/consistent-type-imports": "warn" // 或者 "error" 来强制执行
}
}

View File

@@ -0,0 +1,6 @@
{
"Confirm to rebuild embedding tip": "Are you sure to switch the knowledge base index? Switching index is a very heavy operation that requires re-indexing all the data in your knowledge base, which may take a long time. Please ensure that the remaining points in your account are sufficient.",
"Rebuild embedding start tip": "The task of switching index models has begun",
"Rebuilding index count": "Rebuilding count: {{count}}",
"The knowledge base has indexes that are being trained or being rebuilt": "The knowledge base has indexes that are being trained or being rebuilt"
}

View File

@@ -0,0 +1,6 @@
{
"Confirm to rebuild embedding tip": "确认为知识库切换索引?\n切换索引是一个非常重量的操作需要对您知识库内所有数据进行重新索引时间可能较长请确保账号内剩余积分充足。",
"Rebuild embedding start tip": "切换索引模型任务已开始",
"Rebuilding index count": "重建中索引数量: {{count}}",
"The knowledge base has indexes that are being trained or being rebuilt": "知识库有训练中或正在重建的索引"
}

View File

@@ -8,8 +8,13 @@ import MySelect, { SelectProps } from '@fastgpt/web/components/common/MySelect';
import { HUGGING_FACE_ICON, LOGO_ICON } from '@fastgpt/global/common/system/constants';
import { Box, Flex } from '@chakra-ui/react';
import Avatar from '../Avatar';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
const AIModelSelector = ({ list, onchange, ...props }: SelectProps) => {
type Props = SelectProps & {
disableTip?: string;
};
const AIModelSelector = ({ list, onchange, disableTip, ...props }: Props) => {
const { t } = useTranslation();
const { feConfigs, llmModelList, vectorModelList } = useSystemStore();
const router = useRouter();
@@ -62,9 +67,9 @@ const AIModelSelector = ({ list, onchange, ...props }: SelectProps) => {
);
return (
<>
<MySelect list={expandList} {...props} onchange={onSelect} />
</>
<MyTooltip label={disableTip}>
<MySelect isDisabled={!!disableTip} list={expandList} {...props} onchange={onSelect} />
</MyTooltip>
);
};

View File

@@ -22,6 +22,11 @@ export type CreateDatasetParams = {
agentModel?: string;
};
export type RebuildEmbeddingProps = {
datasetId: string;
vectorModel: string;
};
/* ================= collection ===================== */
/* ================= data ===================== */

View File

@@ -0,0 +1,178 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
import { NextAPI } from '@/service/middle/entry';
import { PgDatasetTableName } from '@fastgpt/global/common/vectorStore/constants';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { addLog } from '@fastgpt/service/common/system/log';
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
async function handler(req: NextApiRequest, res: NextApiResponse) {
await authCert({ req, authRoot: true });
// 重命名 dataset.trainigns -> dataset_trainings
try {
const collections = await connectionMongo.connection.db
.listCollections({ name: 'dataset.trainings' })
.toArray();
if (collections.length > 0) {
const sourceCol = connectionMongo.connection.db.collection('dataset.trainings');
const targetCol = connectionMongo.connection.db.collection('dataset_trainings');
if ((await targetCol.countDocuments()) > 0) {
console.log(
'dataset_trainings 中有数据,无法自动将 dataset.trainings 迁移到 dataset_trainings请手动操作'
);
} else {
await sourceCol.rename('dataset_trainings', { dropTarget: true });
console.log('success rename dataset.trainings -> dataset_trainings');
}
}
} catch (error) {
console.log('error rename dataset.trainings -> dataset_trainings', error);
}
try {
const collections = await connectionMongo.connection.db
.listCollections({ name: 'dataset.collections' })
.toArray();
if (collections.length > 0) {
const sourceCol = connectionMongo.connection.db.collection('dataset.collections');
const targetCol = connectionMongo.connection.db.collection('dataset_collections');
if ((await targetCol.countDocuments()) > 0) {
console.log(
'dataset_collections 中有数据,无法自动将 dataset.collections 迁移到 dataset_collections请手动操作'
);
} else {
await sourceCol.rename('dataset_collections', { dropTarget: true });
console.log('success rename dataset.collections -> dataset_collections');
}
}
} catch (error) {
console.log('error rename dataset.collections -> dataset_collections', error);
}
try {
const collections = await connectionMongo.connection.db
.listCollections({ name: 'dataset.datas' })
.toArray();
if (collections.length > 0) {
const sourceCol = connectionMongo.connection.db.collection('dataset.datas');
const targetCol = connectionMongo.connection.db.collection('dataset_datas');
if ((await targetCol.countDocuments()) > 0) {
console.log(
'dataset_datas 中有数据,无法自动将 dataset.datas 迁移到 dataset_datas请手动操作'
);
} else {
await sourceCol.rename('dataset_datas', { dropTarget: true });
console.log('success rename dataset.datas -> dataset_datas');
}
}
} catch (error) {
console.log('error rename dataset.datas -> dataset_datas', error);
}
try {
const collections = await connectionMongo.connection.db
.listCollections({ name: 'app.versions' })
.toArray();
if (collections.length > 0) {
const sourceCol = connectionMongo.connection.db.collection('app.versions');
const targetCol = connectionMongo.connection.db.collection('app_versions');
if ((await targetCol.countDocuments()) > 0) {
console.log(
'app_versions 中有数据,无法自动将 app.versions 迁移到 app_versions请手动操作'
);
} else {
await sourceCol.rename('app_versions', { dropTarget: true });
console.log('success rename app.versions -> app_versions');
}
}
} catch (error) {
console.log('error rename app.versions -> app_versions', error);
}
try {
const collections = await connectionMongo.connection.db
.listCollections({ name: 'buffer.rawtexts' })
.toArray();
if (collections.length > 0) {
const sourceCol = connectionMongo.connection.db.collection('buffer.rawtexts');
const targetCol = connectionMongo.connection.db.collection('buffer_rawtexts');
if ((await targetCol.countDocuments()) > 0) {
console.log(
'buffer_rawtexts 中有数据,无法自动将 buffer.rawtexts 迁移到 buffer_rawtexts请手动操作'
);
} else {
await sourceCol.rename('buffer_rawtexts', { dropTarget: true });
console.log('success rename buffer.rawtexts -> buffer_rawtexts');
}
}
} catch (error) {
console.log('error rename buffer.rawtext -> buffer_rawtext', error);
}
try {
const collections = await connectionMongo.connection.db
.listCollections({ name: 'buffer.tts' })
.toArray();
if (collections.length > 0) {
const sourceCol = connectionMongo.connection.db.collection('buffer.tts');
const targetCol = connectionMongo.connection.db.collection('buffer_tts');
if ((await targetCol.countDocuments()) > 0) {
console.log('buffer_tts 中有数据,无法自动将 buffer.tts 迁移到 buffer_tts请手动操作');
} else {
await sourceCol.rename('buffer_tts', { dropTarget: true });
console.log('success rename buffer.tts -> buffer_tts');
}
}
} catch (error) {
console.log('error rename buffer.tts -> buffer_tts', error);
}
try {
const collections = await connectionMongo.connection.db
.listCollections({ name: 'team.members' })
.toArray();
if (collections.length > 0) {
const sourceCol = connectionMongo.connection.db.collection('team.members');
await sourceCol.rename('team_members', { dropTarget: true });
console.log('success rename team.members -> team_members');
}
} catch (error) {
console.log('error rename team.members -> team_members', error);
}
try {
const collections = await connectionMongo.connection.db
.listCollections({ name: 'team.tags' })
.toArray();
if (collections.length > 0) {
const sourceCol = connectionMongo.connection.db.collection('team.tags');
const targetCol = connectionMongo.connection.db.collection('team_tags');
if ((await targetCol.countDocuments()) > 0) {
console.log('team_tags 中有数据,无法自动将 team.tags 迁移到 team_tags请手动操作');
} else {
await sourceCol.rename('team_tags', { dropTarget: true });
console.log('success rename team.tags -> team_tags');
}
}
} catch (error) {
console.log('error rename team.tags -> team_tags', error);
}
jsonRes(res, {
message: 'success'
});
}
export default NextAPI(handler);

View File

@@ -0,0 +1,39 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
type Props = {};
export type getDatasetTrainingQueueResponse = {
rebuildingCount: number;
trainingCount: number;
};
async function handler(
req: ApiRequestProps<any, { datasetId: string }>,
res: ApiResponseType<any>
): Promise<getDatasetTrainingQueueResponse> {
const { datasetId } = req.query;
const { teamId } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId,
per: 'r'
});
const [rebuildingCount, trainingCount] = await Promise.all([
MongoDatasetData.countDocuments({ teamId, datasetId, rebuilding: true }),
MongoDatasetTraining.countDocuments({ teamId, datasetId })
]);
return {
rebuildingCount,
trainingCount
};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,133 @@
import { NextAPI } from '@/service/middle/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
export type rebuildEmbeddingBody = {
datasetId: string;
vectorModel: string;
};
export type Response = {};
async function handler(
req: ApiRequestProps<rebuildEmbeddingBody>,
res: ApiResponseType<any>
): Promise<Response> {
const { datasetId, vectorModel } = req.body;
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId,
per: 'owner'
});
// check vector model
if (!vectorModel || dataset.vectorModel === vectorModel) {
return Promise.reject('vectorModel 不合法');
}
// check rebuilding or training
const [rebuilding, training] = await Promise.all([
MongoDatasetData.findOne({ teamId, datasetId, rebuilding: true }),
MongoDatasetTraining.findOne({ teamId, datasetId })
]);
if (rebuilding || training) {
return Promise.reject('数据集正在训练或者重建中,请稍后再试');
}
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: '切换索引模型',
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name
});
// update vector model and dataset.data rebuild field
await mongoSessionRun(async (session) => {
await MongoDataset.findByIdAndUpdate(
datasetId,
{
vectorModel
},
{ session }
);
await MongoDatasetData.updateMany(
{
teamId,
datasetId
},
{
$set: {
rebuilding: true
}
},
{
session
}
);
});
// get 10 init dataset.data
const arr = new Array(10).fill(0);
for await (const _ of arr) {
await mongoSessionRun(async (session) => {
const data = await MongoDatasetData.findOneAndUpdate(
{
teamId,
datasetId,
rebuilding: true
},
{
$unset: {
rebuilding: null
},
updateTime: new Date()
},
{
session
}
).select({
_id: 1,
collectionId: 1
});
if (data) {
await MongoDatasetTraining.create(
[
{
teamId,
tmbId,
datasetId,
collectionId: data.collectionId,
billId,
mode: TrainingModeEnum.chunk,
model: vectorModel,
q: '1',
dataId: data._id
}
],
{
session
}
);
}
});
}
return {};
}
export default NextAPI(handler);

View File

@@ -16,25 +16,47 @@ import PermissionRadio from '@/components/support/permission/Radio';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import MySelect from '@fastgpt/web/components/common/MySelect';
import AIModelSelector from '@/components/Select/AIModelSelector';
import { postRebuildEmbedding } from '@/web/core/dataset/api';
import { useI18n } from '@/web/context/I18n';
import type { VectorModelItemType } from '@fastgpt/global/core/ai/model.d';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MyDivider from '@fastgpt/web/components/common/MyDivider/index';
const Info = ({ datasetId }: { datasetId: string }) => {
const { t } = useTranslation();
const { datasetDetail, loadDatasets, updateDataset } = useDatasetStore();
const { getValues, setValue, register, handleSubmit } = useForm<DatasetItemType>({
const { datasetT } = useI18n();
const { datasetDetail, loadDatasetDetail, loadDatasets, updateDataset } = useDatasetStore();
const rebuildingCount = useContextSelector(DatasetPageContext, (v) => v.rebuildingCount);
const trainingCount = useContextSelector(DatasetPageContext, (v) => v.trainingCount);
const refetchDatasetTraining = useContextSelector(
DatasetPageContext,
(v) => v.refetchDatasetTraining
);
const { setValue, register, handleSubmit, watch } = useForm<DatasetItemType>({
defaultValues: datasetDetail
});
const avatar = watch('avatar');
const vectorModel = watch('vectorModel');
const agentModel = watch('agentModel');
const permission = watch('permission');
const { datasetModelList, vectorModelList } = useSystemStore();
const router = useRouter();
const [refresh, setRefresh] = useState(false);
const { openConfirm, ConfirmModal } = useConfirm({
const { openConfirm: onOpenConfirmDel, ConfirmModal: ConfirmDelModal } = useConfirm({
content: t('core.dataset.Delete Confirm'),
type: 'delete'
});
const { openConfirm: onOpenConfirmRebuild, ConfirmModal: ConfirmRebuildModal } = useConfirm({
title: t('common.confirm.Common Tip'),
content: datasetT('Confirm to rebuild embedding tip'),
type: 'delete'
});
const { File, onOpen: onOpenSelectFile } = useSelectFile({
fileType: '.jpg,.png',
@@ -81,13 +103,27 @@ const Info = ({ datasetId }: { datasetId: string }) => {
onSuccess(src: string | null) {
if (src) {
setValue('avatar', src);
setRefresh((state) => !state);
}
},
errorToast: t('common.avatar.Select Failed')
});
const btnLoading = useMemo(() => isDeleting || isSaving, [isDeleting, isSaving]);
const { mutate: onRebuilding, isLoading: isRebuilding } = useRequest({
mutationFn: (vectorModel: VectorModelItemType) => {
return postRebuildEmbedding({
datasetId,
vectorModel: vectorModel.model
});
},
onSuccess() {
refetchDatasetTraining();
loadDatasetDetail(datasetId, true);
},
successToast: datasetT('Rebuild embedding start tip'),
errorToast: t('common.Update Failed')
});
const btnLoading = isSelecting || isDeleting || isSaving || isRebuilding;
return (
<Box py={5} px={[5, 10]}>
@@ -97,6 +133,62 @@ const Info = ({ datasetId }: { datasetId: string }) => {
</Box>
<Box flex={1}>{datasetDetail._id}</Box>
</Flex>
<Flex mt={8} w={'100%'} alignItems={'center'} flexWrap={'wrap'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
{t('core.ai.model.Vector Model')}
</Box>
<Box flex={[1, '0 0 300px']}>
<AIModelSelector
w={'100%'}
value={vectorModel.model}
disableTip={
rebuildingCount > 0 || trainingCount > 0
? datasetT('The knowledge base has indexes that are being trained or being rebuilt')
: undefined
}
list={vectorModelList.map((item) => ({
label: item.name,
value: item.model
}))}
onchange={(e) => {
const vectorModel = vectorModelList.find((item) => item.model === e);
if (!vectorModel) return;
onOpenConfirmRebuild(() => {
setValue('vectorModel', vectorModel);
onRebuilding(vectorModel);
})();
}}
/>
</Box>
</Flex>
<Flex mt={8} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
{t('core.Max Token')}
</Box>
<Box flex={[1, '0 0 300px']}>{vectorModel.maxToken}</Box>
</Flex>
<Flex mt={6} alignItems={'center'} flexWrap={'wrap'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
{t('core.ai.model.Dataset Agent Model')}
</Box>
<Box flex={[1, '0 0 300px']}>
<AIModelSelector
w={'100%'}
value={agentModel.model}
list={datasetModelList.map((item) => ({
label: item.name,
value: item.model
}))}
onchange={(e) => {
const agentModel = datasetModelList.find((item) => item.model === e);
if (!agentModel) return;
setValue('agentModel', agentModel);
}}
/>
</Box>
</Flex>
<MyDivider my={4} h={'2px'} maxW={'500px'} />
<Flex mt={5} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
@@ -106,7 +198,7 @@ const Info = ({ datasetId }: { datasetId: string }) => {
<MyTooltip label={t('common.avatar.Select Avatar')}>
<Avatar
m={'auto'}
src={getValues('avatar')}
src={avatar}
w={['32px', '40px']}
h={['32px', '40px']}
cursor={'pointer'}
@@ -121,40 +213,6 @@ const Info = ({ datasetId }: { datasetId: string }) => {
</Box>
<Input flex={[1, '0 0 300px']} maxLength={30} {...register('name')} />
</Flex>
<Flex mt={8} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
{t('core.ai.model.Vector Model')}
</Box>
<Box flex={[1, '0 0 300px']}>{getValues('vectorModel').name}</Box>
</Flex>
<Flex mt={8} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
{t('core.Max Token')}
</Box>
<Box flex={[1, '0 0 300px']}>{getValues('vectorModel').maxToken}</Box>
</Flex>
<Flex mt={6} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
{t('core.ai.model.Dataset Agent Model')}
</Box>
<Box flex={[1, '0 0 300px']}>
<AIModelSelector
w={'100%'}
value={getValues('agentModel').model}
list={datasetModelList.map((item) => ({
label: item.name,
value: item.model
}))}
onchange={(e) => {
const agentModel = datasetModelList.find((item) => item.model === e);
if (!agentModel) return;
setValue('agentModel', agentModel);
setRefresh((state) => !state);
}}
/>
</Box>
</Flex>
<Flex mt={8} alignItems={'center'} w={'100%'}>
<Box flex={['0 0 90px', '0 0 160px']}>{t('common.Intro')}</Box>
<Textarea flex={[1, '0 0 300px']} {...register('intro')} placeholder={t('common.Intro')} />
@@ -166,10 +224,9 @@ const Info = ({ datasetId }: { datasetId: string }) => {
</Box>
<Box>
<PermissionRadio
value={getValues('permission')}
value={permission}
onChange={(e) => {
setValue('permission', e);
setRefresh(!refresh);
}}
/>
</Box>
@@ -193,12 +250,14 @@ const Info = ({ datasetId }: { datasetId: string }) => {
aria-label={''}
variant={'whiteDanger'}
size={'smSquare'}
onClick={openConfirm(onclickDelete)}
onClick={onOpenConfirmDel(onclickDelete)}
/>
)}
</Flex>
<File onSelect={onSelectFile} />
<ConfirmModal />
<ConfirmDelModal />
<ConfirmRebuildModal countDown={10} />
</Box>
);
};

View File

@@ -0,0 +1,215 @@
import React, { useCallback } from 'react';
import { useTranslation } from 'next-i18next';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useUserStore } from '@/web/support/user/useUserStore';
import { Box, Flex, IconButton, useTheme, Progress } from '@chakra-ui/react';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import Avatar from '@/components/Avatar';
import {
DatasetStatusEnum,
DatasetTypeEnum,
DatasetTypeMap
} from '@fastgpt/global/core/dataset/constants';
import DatasetTypeTag from '@/components/core/dataset/DatasetTypeTag';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import SideTabs from '@/components/SideTabs';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useRouter } from 'next/router';
import Tabs from '@/components/Tabs';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useI18n } from '@/web/context/I18n';
export enum TabEnum {
dataCard = 'dataCard',
collectionCard = 'collectionCard',
test = 'test',
info = 'info',
import = 'import'
}
const Slider = ({ currentTab }: { currentTab: TabEnum }) => {
const theme = useTheme();
const { t } = useTranslation();
const { datasetT } = useI18n();
const router = useRouter();
const query = router.query;
const { datasetDetail, startWebsiteSync } = useDatasetStore();
const { userInfo } = useUserStore();
const { isPc, setLoading } = useSystemStore();
const vectorTrainingMap = useContextSelector(DatasetPageContext, (v) => v.vectorTrainingMap);
const agentTrainingMap = useContextSelector(DatasetPageContext, (v) => v.agentTrainingMap);
const rebuildingCount = useContextSelector(DatasetPageContext, (v) => v.rebuildingCount);
const tabList = [
{
label: t('core.dataset.Collection'),
id: TabEnum.collectionCard,
icon: 'common/overviewLight'
},
{ label: t('core.dataset.test.Search Test'), id: TabEnum.test, icon: 'kbTest' },
...(userInfo?.team.canWrite && datasetDetail.isOwner
? [{ label: t('common.Config'), id: TabEnum.info, icon: 'common/settingLight' }]
: [])
];
const setCurrentTab = useCallback(
(tab: TabEnum) => {
router.replace({
query: {
...query,
currentTab: tab
}
});
},
[query, router]
);
const { ConfirmModal: ConfirmSyncModal, openConfirm: openConfirmSync } = useConfirm({
type: 'common'
});
const { mutate: onUpdateDatasetWebsiteConfig } = useRequest({
mutationFn: () => {
setLoading(true);
return startWebsiteSync();
},
onSettled() {
setLoading(false);
},
errorToast: t('common.Update Failed')
});
return (
<>
{isPc ? (
<Flex
flexDirection={'column'}
py={4}
h={'100%'}
flex={'0 0 200px'}
borderRight={theme.borders.base}
>
<Box px={4} borderBottom={'1px'} borderColor={'myGray.200'} pb={4} mb={4}>
<Flex mb={4} alignItems={'center'}>
<Avatar src={datasetDetail.avatar} w={'34px'} borderRadius={'md'} />
<Box ml={2}>
<Box fontWeight={'bold'}>{datasetDetail.name}</Box>
</Box>
</Flex>
{DatasetTypeMap[datasetDetail.type] && (
<Flex alignItems={'center'} pl={2} justifyContent={'space-between'}>
<DatasetTypeTag type={datasetDetail.type} />
{datasetDetail.type === DatasetTypeEnum.websiteDataset &&
datasetDetail.status === DatasetStatusEnum.active && (
<MyTooltip label={t('core.dataset.website.Start Sync')}>
<MyIcon
mt={1}
name={'common/refreshLight'}
w={'12px'}
color={'myGray.500'}
cursor={'pointer'}
onClick={() =>
openConfirmSync(
onUpdateDatasetWebsiteConfig,
undefined,
t('core.dataset.website.Confirm Create Tips')
)()
}
/>
</MyTooltip>
)}
</Flex>
)}
</Box>
<SideTabs
px={4}
flex={1}
mx={'auto'}
w={'100%'}
list={tabList}
activeId={currentTab}
onChange={(e: any) => {
setCurrentTab(e);
}}
/>
<Box px={4}>
{rebuildingCount > 0 && (
<Box mb={3}>
<Box fontSize={'sm'}>
{datasetT('Rebuilding index count', { count: rebuildingCount })}
</Box>
</Box>
)}
<Box mb={3}>
<Box fontSize={'sm'}>
{t('core.dataset.training.Agent queue')}({agentTrainingMap.tip})
</Box>
<Progress
value={100}
size={'xs'}
colorScheme={agentTrainingMap.colorSchema}
borderRadius={'10px'}
isAnimated
hasStripe
/>
</Box>
<Box mb={3}>
<Box fontSize={'sm'}>
{t('core.dataset.training.Vector queue')}({vectorTrainingMap.tip})
</Box>
<Progress
value={100}
size={'xs'}
colorScheme={vectorTrainingMap.colorSchema}
borderRadius={'10px'}
isAnimated
hasStripe
/>
</Box>
</Box>
<Flex
alignItems={'center'}
cursor={'pointer'}
py={2}
px={3}
borderRadius={'md'}
_hover={{ bg: 'myGray.100' }}
onClick={() => router.replace('/dataset/list')}
>
<IconButton
mr={3}
icon={<MyIcon name={'common/backFill'} w={'18px'} color={'primary.500'} />}
bg={'white'}
boxShadow={'1px 1px 9px rgba(0,0,0,0.15)'}
size={'smSquare'}
borderRadius={'50%'}
aria-label={''}
/>
{t('core.dataset.All Dataset')}
</Flex>
</Flex>
) : (
<Box mb={3}>
<Tabs
m={'auto'}
w={'260px'}
size={isPc ? 'md' : 'sm'}
list={tabList.map((item) => ({
id: item.id,
label: item.label
}))}
activeId={currentTab}
onChange={(e: any) => setCurrentTab(e)}
/>
</Box>
)}
<ConfirmSyncModal />
</>
);
};
export default Slider;

View File

@@ -1,33 +1,22 @@
import React, { useCallback, useMemo } from 'react';
import { useRouter } from 'next/router';
import { Box, Flex, IconButton, useTheme, Progress } from '@chakra-ui/react';
import { Box } from '@chakra-ui/react';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useQuery } from '@tanstack/react-query';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import Tabs from '@/components/Tabs';
import dynamic from 'next/dynamic';
import MyIcon from '@fastgpt/web/components/common/Icon';
import SideTabs from '@/components/SideTabs';
import PageContainer from '@/components/PageContainer';
import Avatar from '@/components/Avatar';
import { serviceSideProps } from '@/web/common/utils/i18n';
import { useTranslation } from 'next-i18next';
import { getTrainingQueueLen } from '@/web/core/dataset/api';
import MyTooltip from '@/components/MyTooltip';
import CollectionCard from './components/CollectionCard';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useUserStore } from '@/web/support/user/useUserStore';
import {
DatasetStatusEnum,
DatasetTypeEnum,
DatasetTypeMap
} from '@fastgpt/global/core/dataset/constants';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import DatasetTypeTag from '@/components/core/dataset/DatasetTypeTag';
import Head from 'next/head';
import Slider from './components/Slider';
import MyBox from '@fastgpt/web/components/common/MyBox';
import { DatasetPageContextProvider } from '@/web/core/dataset/context/datasetPageContext';
const DataCard = dynamic(() => import('./components/DataCard'));
const Test = dynamic(() => import('./components/Test'));
@@ -42,48 +31,16 @@ export enum TabEnum {
import = 'import'
}
const Detail = ({ datasetId, currentTab }: { datasetId: string; currentTab: `${TabEnum}` }) => {
const theme = useTheme();
const Detail = ({ datasetId, currentTab }: { datasetId: string; currentTab: TabEnum }) => {
const { t } = useTranslation();
const { toast } = useToast();
const router = useRouter();
const { isPc } = useSystemStore();
const { datasetDetail, loadDatasetDetail, startWebsiteSync } = useDatasetStore();
const { userInfo } = useUserStore();
const tabList = [
{
label: t('core.dataset.Collection'),
id: TabEnum.collectionCard,
icon: 'common/overviewLight'
},
{ label: t('core.dataset.test.Search Test'), id: TabEnum.test, icon: 'kbTest' },
...(userInfo?.team.canWrite && datasetDetail.isOwner
? [{ label: t('common.Config'), id: TabEnum.info, icon: 'common/settingLight' }]
: [])
];
const { datasetDetail, loadDatasetDetail } = useDatasetStore();
const { ConfirmModal: ConfirmSyncModal, openConfirm: openConfirmSync } = useConfirm({
type: 'common'
});
const { mutate: onUpdateDatasetWebsiteConfig, isLoading: isUpdating } = useRequest({
mutationFn: () => startWebsiteSync(),
errorToast: t('common.Update Failed')
});
const setCurrentTab = useCallback(
(tab: `${TabEnum}`) => {
router.replace({
query: {
datasetId,
currentTab: tab
}
});
},
[datasetId, router]
);
useQuery([datasetId], () => loadDatasetDetail(datasetId), {
onError(err: any) {
router.replace(`/dataset/list`);
@@ -94,185 +51,19 @@ const Detail = ({ datasetId, currentTab }: { datasetId: string; currentTab: `${T
}
});
const { data: { vectorTrainingCount = 0, agentTrainingCount = 0 } = {} } = useQuery(
['getTrainingQueueLen'],
() =>
getTrainingQueueLen({
vectorModel: datasetDetail.vectorModel.model,
agentModel: datasetDetail.agentModel.model
}),
{
refetchInterval: 10000
}
);
const { vectorTrainingMap, agentTrainingMap } = useMemo(() => {
const vectorTrainingMap = (() => {
if (vectorTrainingCount < 1000)
return {
colorSchema: 'green',
tip: t('core.dataset.training.Leisure')
};
if (vectorTrainingCount < 10000)
return {
colorSchema: 'yellow',
tip: t('core.dataset.training.Waiting')
};
return {
colorSchema: 'red',
tip: t('core.dataset.training.Full')
};
})();
const agentTrainingMap = (() => {
if (agentTrainingCount < 100)
return {
colorSchema: 'green',
tip: t('core.dataset.training.Leisure')
};
if (agentTrainingCount < 1000)
return {
colorSchema: 'yellow',
tip: t('core.dataset.training.Waiting')
};
return {
colorSchema: 'red',
tip: t('core.dataset.training.Full')
};
})();
return {
vectorTrainingMap,
agentTrainingMap
};
}, [agentTrainingCount, t, vectorTrainingCount]);
return (
<>
<Head>
<title>{datasetDetail?.name}</title>
</Head>
<PageContainer>
<MyBox
isLoading={isUpdating}
display={'flex'}
flexDirection={['column', 'row']}
h={'100%'}
pt={[4, 0]}
>
{isPc ? (
<Flex
flexDirection={'column'}
py={4}
h={'100%'}
flex={'0 0 200px'}
borderRight={theme.borders.base}
>
<Box px={4} borderBottom={'1px'} borderColor={'myGray.200'} pb={4} mb={4}>
<Flex mb={4} alignItems={'center'}>
<Avatar src={datasetDetail.avatar} w={'34px'} borderRadius={'md'} />
<Box ml={2}>
<Box fontWeight={'bold'}>{datasetDetail.name}</Box>
</Box>
</Flex>
{DatasetTypeMap[datasetDetail.type] && (
<Flex alignItems={'center'} pl={2} justifyContent={'space-between'}>
<DatasetTypeTag type={datasetDetail.type} />
{datasetDetail.type === DatasetTypeEnum.websiteDataset &&
datasetDetail.status === DatasetStatusEnum.active && (
<MyTooltip label={t('core.dataset.website.Start Sync')}>
<MyIcon
mt={1}
name={'common/refreshLight'}
w={'12px'}
color={'myGray.500'}
cursor={'pointer'}
onClick={() =>
openConfirmSync(
onUpdateDatasetWebsiteConfig,
undefined,
t('core.dataset.website.Confirm Create Tips')
)()
}
/>
</MyTooltip>
)}
</Flex>
)}
</Box>
<SideTabs
px={4}
flex={1}
mx={'auto'}
w={'100%'}
list={tabList}
activeId={currentTab}
onChange={(e: any) => {
setCurrentTab(e);
<DatasetPageContextProvider
value={{
datasetId
}}
/>
<Box px={4}>
<Box mb={3}>
<Box fontSize={'sm'}>
{t('core.dataset.training.Agent queue')}({agentTrainingMap.tip})
</Box>
<Progress
value={100}
size={'xs'}
colorScheme={agentTrainingMap.colorSchema}
borderRadius={'10px'}
isAnimated
hasStripe
/>
</Box>
<Box mb={3}>
<Box fontSize={'sm'}>
{t('core.dataset.training.Vector queue')}({vectorTrainingMap.tip})
</Box>
<Progress
value={100}
size={'xs'}
colorScheme={vectorTrainingMap.colorSchema}
borderRadius={'10px'}
isAnimated
hasStripe
/>
</Box>
</Box>
<Flex
alignItems={'center'}
cursor={'pointer'}
py={2}
px={3}
borderRadius={'md'}
_hover={{ bg: 'myGray.100' }}
onClick={() => router.replace('/dataset/list')}
>
<IconButton
mr={3}
icon={<MyIcon name={'common/backFill'} w={'18px'} color={'primary.500'} />}
bg={'white'}
boxShadow={'1px 1px 9px rgba(0,0,0,0.15)'}
size={'smSquare'}
borderRadius={'50%'}
aria-label={''}
/>
{t('core.dataset.All Dataset')}
</Flex>
</Flex>
) : (
<Box mb={3}>
<Tabs
m={'auto'}
w={'260px'}
size={isPc ? 'md' : 'sm'}
list={tabList.map((item) => ({
id: item.id,
label: item.label
}))}
activeId={currentTab}
onChange={(e: any) => setCurrentTab(e)}
/>
</Box>
)}
<PageContainer>
<MyBox display={'flex'} flexDirection={['column', 'row']} h={'100%'} pt={[4, 0]}>
<Slider currentTab={currentTab} />
{!!datasetDetail._id && (
<Box flex={'1 0 0'} pb={0}>
@@ -285,6 +76,8 @@ const Detail = ({ datasetId, currentTab }: { datasetId: string; currentTab: `${T
)}
</MyBox>
</PageContainer>
</DatasetPageContextProvider>
<ConfirmSyncModal />
</>
);
@@ -295,7 +88,7 @@ export async function getServerSideProps(context: any) {
const datasetId = context?.query?.datasetId;
return {
props: { currentTab, datasetId, ...(await serviceSideProps(context, ['file'])) }
props: { currentTab, datasetId, ...(await serviceSideProps(context, ['dataset', 'file'])) }
};
}

View File

@@ -305,6 +305,42 @@ const Kb = () => {
</Box>
}
menuList={[
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'edit'} w={'14px'} mr={2} />
{t('Rename')}
</Flex>
),
onClick: () =>
onOpenTitleModal({
defaultVal: dataset.name,
onSuccess: (val) => {
if (val === dataset.name || !val) return;
updateDataset({ id: dataset._id, name: val });
}
})
},
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'common/file/move'} w={'14px'} mr={2} />
{t('Move')}
</Flex>
),
onClick: () => setMoveDataId(dataset._id)
},
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'export'} w={'14px'} mr={2} />
{t('Export')}
</Flex>
),
onClick: () => {
exportDataset(dataset);
}
},
...(dataset.permission === PermissionTypeEnum.private
? [
{
@@ -342,42 +378,6 @@ const Kb = () => {
}
}
]),
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'edit'} w={'14px'} mr={2} />
{t('Rename')}
</Flex>
),
onClick: () =>
onOpenTitleModal({
defaultVal: dataset.name,
onSuccess: (val) => {
if (val === dataset.name || !val) return;
updateDataset({ id: dataset._id, name: val });
}
})
},
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'common/file/move'} w={'14px'} mr={2} />
{t('Move')}
</Flex>
),
onClick: () => setMoveDataId(dataset._id)
},
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'export'} w={'14px'} mr={2} />
{t('Export')}
</Flex>
),
onClick: () => {
exportDataset(dataset);
}
},
{
label: (
<Flex alignItems={'center'}>

View File

@@ -28,7 +28,7 @@ const clearInvalidDataCron = () => {
lockMinuted: 59
})
) {
checkInvalidDatasetFiles(addHours(new Date(), 2), addHours(new Date(), 6));
checkInvalidDatasetFiles(addHours(new Date(), -6), addHours(new Date(), -2));
}
});
@@ -39,7 +39,7 @@ const clearInvalidDataCron = () => {
lockMinuted: 59
})
) {
checkInvalidDatasetData(addHours(new Date(), 2), addHours(new Date(), 6));
checkInvalidDatasetData(addHours(new Date(), -6), addHours(new Date(), -2));
}
});
@@ -50,7 +50,7 @@ const clearInvalidDataCron = () => {
lockMinuted: 59
})
) {
checkInvalidVector(addHours(new Date(), 2), addHours(new Date(), 6));
checkInvalidVector(addHours(new Date(), -6), addHours(new Date(), -2));
}
});
};

View File

@@ -11,6 +11,7 @@ import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/con
import { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { ClientSession } from '@fastgpt/service/common/mongo';
/* insert data.
* 1. create data id
@@ -26,9 +27,11 @@ export async function insertData2Dataset({
a = '',
chunkIndex = 0,
indexes,
model
model,
session
}: CreateDatasetDataProps & {
model: string;
session?: ClientSession;
}) {
if (!q || !datasetId || !collectionId || !model) {
console.log(q, a, datasetId, collectionId, model);
@@ -70,7 +73,9 @@ export async function insertData2Dataset({
);
// create mongo data
const { _id } = await MongoDatasetData.create({
const [{ _id }] = await MongoDatasetData.create(
[
{
teamId,
tmbId,
datasetId,
@@ -83,7 +88,10 @@ export async function insertData2Dataset({
...item,
dataId: result[i].insertId
}))
});
}
],
{ session }
);
return {
insertId: _id,

View File

@@ -46,7 +46,6 @@ export async function generateQA(): Promise<any> {
)
.select({
_id: 1,
userId: 1,
teamId: 1,
tmbId: 1,
datasetId: 1,

View File

@@ -6,6 +6,15 @@ import { checkTeamAiPointsAndLock } from './utils';
import { checkInvalidChunkAndLock } from '@fastgpt/service/core/dataset/training/utils';
import { addMinutes } from 'date-fns';
import { addLog } from '@fastgpt/service/common/system/log';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import {
deleteDatasetDataVector,
insertDatasetDataVector
} from '@fastgpt/service/common/vectorStore/controller';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { Document } from '@fastgpt/service/common/mongo';
const reduceQueue = () => {
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
@@ -23,7 +32,6 @@ export async function generateVector(): Promise<any> {
// get training data
const {
data,
dataItem,
done = false,
error = false
} = await (async () => {
@@ -38,7 +46,6 @@ export async function generateVector(): Promise<any> {
}
).select({
_id: 1,
userId: 1,
teamId: 1,
tmbId: 1,
datasetId: 1,
@@ -46,6 +53,7 @@ export async function generateVector(): Promise<any> {
q: 1,
a: 1,
chunkIndex: 1,
dataId: 1,
indexes: 1,
model: 1,
billId: 1
@@ -58,12 +66,7 @@ export async function generateVector(): Promise<any> {
};
}
return {
data,
dataItem: {
q: data.q,
a: data.a || '',
indexes: data.indexes
}
data
};
} catch (error) {
addLog.error(`Get Training Data error`, error);
@@ -93,28 +96,14 @@ export async function generateVector(): Promise<any> {
addLog.info(`[Vector Queue] Start`);
// create vector and insert
try {
// invalid data
if (!data.q.trim()) {
await data.deleteOne();
reduceQueue();
generateVector();
return;
const { tokens } = await (async () => {
if (data.dataId) {
return rebuildData({ trainingData: data });
} else {
return insertData({ trainingData: data });
}
// insert to dataset
const { tokens } = await insertData2Dataset({
teamId: data.teamId,
tmbId: data.tmbId,
datasetId: data.datasetId,
collectionId: data.collectionId,
q: dataItem.q,
a: dataItem.a,
chunkIndex: data.chunkIndex,
indexes: dataItem.indexes,
model: data.model
});
})();
// push usage
pushGenerateVectorUsage({
@@ -125,14 +114,12 @@ export async function generateVector(): Promise<any> {
billId: data.billId
});
// delete data from training
await data.deleteOne();
reduceQueue();
generateVector();
addLog.info(`[Vector Queue] Finish`, {
time: Date.now() - start
});
reduceQueue();
generateVector();
} catch (err: any) {
reduceQueue();
@@ -145,3 +132,152 @@ export async function generateVector(): Promise<any> {
}, 1000);
}
}
const rebuildData = async ({
trainingData
}: {
trainingData: Document<unknown, {}, DatasetTrainingSchemaType> &
Omit<
DatasetTrainingSchemaType &
Required<{
_id: string;
}>,
never
>;
}) => {
// find data
const mongoData = await MongoDatasetData.findById(
trainingData.dataId,
'indexes teamId datasetId collectionId'
);
if (!mongoData) {
await trainingData.deleteOne();
return Promise.reject('Not data');
}
const deleteVectorIdList = mongoData.indexes.map((index) => index.dataId);
const { tokens } = await mongoSessionRun(async (session) => {
// update vector, update dataset.data rebuilding status, delete data from training
const updateResult = await Promise.all(
mongoData.indexes.map(async (index, i) => {
const result = await insertDatasetDataVector({
query: index.text,
model: getVectorModel(trainingData.model),
teamId: mongoData.teamId,
datasetId: mongoData.datasetId,
collectionId: mongoData.collectionId
});
mongoData.indexes[i].dataId = result.insertId;
return result;
})
);
// Ensure that the training data is deleted after the Mongo update is successful
await mongoData.save({ session });
await trainingData.deleteOne({ session });
// delete old vector
await deleteDatasetDataVector({
teamId: mongoData.teamId,
idList: deleteVectorIdList
});
return {
tokens: updateResult.reduce((acc, cur) => acc + cur.tokens, 0)
};
});
// find next data insert to training queue
const arr = new Array(5).fill(0);
for await (const _ of arr) {
try {
const hasNextData = await mongoSessionRun(async (session) => {
// get new mongoData insert to training
const newRebuildingData = await MongoDatasetData.findOneAndUpdate(
{
teamId: mongoData.teamId,
datasetId: mongoData.datasetId,
rebuilding: true
},
{
$unset: {
rebuilding: null
},
updateTime: new Date()
},
{ session }
).select({
_id: 1,
collectionId: 1
});
if (newRebuildingData) {
await MongoDatasetTraining.create(
[
{
teamId: mongoData.teamId,
tmbId: trainingData.tmbId,
datasetId: mongoData.datasetId,
collectionId: newRebuildingData.collectionId,
billId: trainingData.billId,
mode: TrainingModeEnum.chunk,
model: trainingData.model,
q: '1',
dataId: newRebuildingData._id
}
],
{ session }
);
}
return !!newRebuildingData;
});
if (!hasNextData) {
break;
}
} catch (error) {}
}
return { tokens };
};
const insertData = async ({
trainingData
}: {
trainingData: Document<unknown, {}, DatasetTrainingSchemaType> &
Omit<
DatasetTrainingSchemaType &
Required<{
_id: string;
}>,
never
>;
}) => {
const { tokens } = await mongoSessionRun(async (session) => {
// insert new data to dataset
const { tokens } = await insertData2Dataset({
teamId: trainingData.teamId,
tmbId: trainingData.tmbId,
datasetId: trainingData.datasetId,
collectionId: trainingData.collectionId,
q: trainingData.q,
a: trainingData.a,
chunkIndex: trainingData.chunkIndex,
indexes: trainingData.indexes,
model: trainingData.model,
session
});
// delete data from training
await trainingData.deleteOne({ session });
return {
tokens
};
});
return { tokens };
};

View File

@@ -1,10 +1,16 @@
import { jsonRes } from '@fastgpt/service/common/response';
import type { NextApiResponse, NextApiHandler, NextApiRequest } from 'next';
import type { NextApiResponse } from 'next';
import { connectToDatabase } from '../mongo';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type NextApiHandler<T = any> = (
req: ApiRequestProps,
res: NextApiResponse<T>
) => unknown | Promise<unknown>;
export const NextAPI = (...args: NextApiHandler[]): NextApiHandler => {
return async function api(req: NextApiRequest, res: NextApiResponse) {
return async function api(req: ApiRequestProps, res: NextApiResponse) {
try {
await Promise.all([withNextCors(req, res), connectToDatabase()]);

View File

@@ -19,8 +19,6 @@ import type {
GetTrainingQueueProps,
GetTrainingQueueResponse,
PostPreviewFilesChunksProps,
PostPreviewFilesChunksResponse,
PostPreviewTableChunksResponse,
SearchTestProps,
SearchTestResponse
} from '@/global/core/dataset/api.d';
@@ -29,7 +27,6 @@ import type {
CreateDatasetParams,
InsertOneDatasetDataProps
} from '@/global/core/dataset/api.d';
import type { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api.d';
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import {
DatasetCollectionSyncResultEnum,
@@ -38,6 +35,8 @@ import {
import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import { PagingData } from '@/types';
import type { getDatasetTrainingQueueResponse } from '@/pages/api/core/dataset/training/getDatasetTrainingQueue';
import type { rebuildEmbeddingBody } from '@/pages/api/core/dataset/training/rebuildEmbedding';
/* ======================== dataset ======================= */
export const getDatasets = (data: { parentId?: string; type?: `${DatasetTypeEnum}` }) =>
@@ -124,9 +123,17 @@ export const delOneDatasetDataById = (id: string) =>
DELETE<string>(`/core/dataset/data/delete`, { id });
/* ================ training ==================== */
export const postRebuildEmbedding = (data: rebuildEmbeddingBody) =>
POST(`/core/dataset/training/rebuildEmbedding`, data);
/* get length of system training queue */
export const getTrainingQueueLen = (data: GetTrainingQueueProps) =>
GET<GetTrainingQueueResponse>(`/core/dataset/training/getQueueLen`, data);
export const getDatasetTrainingQueue = (datasetId: string) =>
GET<getDatasetTrainingQueueResponse>(`/core/dataset/training/getDatasetTrainingQueue`, {
datasetId
});
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<{ q: string; a: string }[]>('/core/dataset/file/getPreviewChunks', data);

View File

@@ -0,0 +1,119 @@
import { useQuery } from '@tanstack/react-query';
import { ReactNode, useMemo } from 'react';
import { useTranslation } from 'next-i18next';
import { createContext } from 'use-context-selector';
import { getDatasetTrainingQueue, getTrainingQueueLen } from '../api';
import { useDatasetStore } from '../store/dataset';
type DatasetPageContextType = {
vectorTrainingMap: {
colorSchema: string;
tip: string;
};
agentTrainingMap: {
colorSchema: string;
tip: string;
};
rebuildingCount: number;
trainingCount: number;
refetchDatasetTraining: () => void;
};
type DatasetPageContextValueType = {
datasetId: string;
};
export const DatasetPageContext = createContext<DatasetPageContextType>({
vectorTrainingMap: {
colorSchema: '',
tip: ''
},
agentTrainingMap: {
colorSchema: '',
tip: ''
},
rebuildingCount: 0,
trainingCount: 0,
refetchDatasetTraining: function (): void {
throw new Error('Function not implemented.');
}
});
export const DatasetPageContextProvider = ({
children,
value
}: {
children: ReactNode;
value: DatasetPageContextValueType;
}) => {
const { t } = useTranslation();
const { datasetId } = value;
const { datasetDetail } = useDatasetStore();
// global queue
const { data: { vectorTrainingCount = 0, agentTrainingCount = 0 } = {} } = useQuery(
['getTrainingQueueLen'],
() =>
getTrainingQueueLen({
vectorModel: datasetDetail.vectorModel.model,
agentModel: datasetDetail.agentModel.model
}),
{
refetchInterval: 10000
}
);
const { vectorTrainingMap, agentTrainingMap } = useMemo(() => {
const vectorTrainingMap = (() => {
if (vectorTrainingCount < 1000)
return {
colorSchema: 'green',
tip: t('core.dataset.training.Leisure')
};
if (vectorTrainingCount < 10000)
return {
colorSchema: 'yellow',
tip: t('core.dataset.training.Waiting')
};
return {
colorSchema: 'red',
tip: t('core.dataset.training.Full')
};
})();
const agentTrainingMap = (() => {
if (agentTrainingCount < 100)
return {
colorSchema: 'green',
tip: t('core.dataset.training.Leisure')
};
if (agentTrainingCount < 1000)
return {
colorSchema: 'yellow',
tip: t('core.dataset.training.Waiting')
};
return {
colorSchema: 'red',
tip: t('core.dataset.training.Full')
};
})();
return {
vectorTrainingMap,
agentTrainingMap
};
}, [agentTrainingCount, t, vectorTrainingCount]);
// training and rebuild queue
const { data: { rebuildingCount = 0, trainingCount = 0 } = {}, refetch: refetchDatasetTraining } =
useQuery(['getDatasetTrainingQueue'], () => getDatasetTrainingQueue(datasetId), {
refetchInterval: 10000
});
const contextValue: DatasetPageContextType = {
vectorTrainingMap,
agentTrainingMap,
rebuildingCount,
trainingCount,
refetchDatasetTraining
};
return <DatasetPageContext.Provider value={contextValue}>{children}</DatasetPageContext.Provider>;
};

View File

@@ -0,0 +1,18 @@
import { ReactNode } from 'react';
import { createContext } from 'use-context-selector';
type DatasetContextType = {};
type DatasetContextValueType = {};
export const DatasetContext = createContext<DatasetContextType>({});
export const DatasetContextProvider = ({
children,
value
}: {
children: ReactNode;
value: DatasetContextValueType;
}) => {
return <DatasetContext.Provider value={value}>{children}</DatasetContext.Provider>;
};