Dataset folder manager (#274)

* feat: retry send

* perf: qa default value

* feat: dataset folder

* feat: kb folder delete and path

* fix: ts

* perf: script load

* feat: fileCard and dataCard

* feat: search file

* feat: max token

* feat: select dataset

* fix: preview chunk

* perf: source update

* export data limit file_id

* docs

* fix: export limit
This commit is contained in:
Archer
2023-09-10 16:37:32 +08:00
committed by GitHub
parent a1a63260dd
commit 7917766024
83 changed files with 1996 additions and 702 deletions

View File

@@ -0,0 +1,32 @@
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
import { connectToDatabase, KB } from '@/service/mongo';
import { KbTypeEnum, KbTypeMap } from '@/constants/kb';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
await authUser({ req, authRoot: true });
await KB.updateMany(
{
type: { $exists: false }
},
{
$set: {
type: KbTypeEnum.dataset,
parentId: null
}
}
);
jsonRes(res, {});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -88,7 +88,7 @@ export async function pushDataToKb({
]);
const modeMaxToken = {
[TrainingModeEnum.index]: vectorModel.maxToken,
[TrainingModeEnum.index]: vectorModel.maxToken * 1.5,
[TrainingModeEnum.qa]: global.qaModel.maxToken * 0.8
};
@@ -146,7 +146,6 @@ export async function pushDataToKb({
}
} catch (error) {
console.log(error);
error;
}
return Promise.resolve(data);
})

View File

@@ -50,7 +50,6 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
await PgClient.update(PgTrainingTableName, {
where: [['id', dataId], 'AND', ['user_id', userId]],
values: [
{ key: 'source', value: '手动修改' },
{ key: 'a', value: a.replace(/'/g, '"') },
...(q
? [

View File

@@ -69,7 +69,7 @@ export async function getVector({
.then(async (res) => {
if (!res.data?.data?.[0]?.embedding) {
// @ts-ignore
return Promise.reject(res.data?.error?.message || 'Embedding API Error');
return Promise.reject(res.data?.err?.message || 'Embedding API Error');
}
return {
tokenLen: res.data.usage.total_tokens || 0,

View File

@@ -0,0 +1,34 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, KB } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { getVectorModel } from '@/service/utils/data';
import { KbListItemType } from '@/types/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
await connectToDatabase();
const kbList = await KB.find({
userId,
type: 'dataset'
});
const data = kbList.map((item) => ({
...item.toJSON(),
vectorModel: getVectorModel(item.vectorModel)
}));
jsonRes<KbListItemType[]>(res, {
data
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -6,11 +6,7 @@ import type { CreateKbParams } from '@/api/request/kb';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
const { name, tags, avatar, vectorModel } = req.body as CreateKbParams;
if (!name || !vectorModel) {
throw new Error('缺少参数');
}
const { name, tags, avatar, vectorModel, parentId, type } = req.body as CreateKbParams;
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
@@ -22,7 +18,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
userId,
tags,
vectorModel,
avatar
avatar,
parentId: parentId || null,
type
});
jsonRes(res, { data: _id });

View File

@@ -4,11 +4,13 @@ import { connectToDatabase, User } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { OtherFileId } from '@/constants/kb';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
let { kbId } = req.query as {
let { kbId, fileId } = req.query as {
kbId: string;
fileId: string;
};
if (!kbId) {
@@ -20,7 +22,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
const thirtyMinutesAgo = new Date(Date.now() - 30 * 60 * 1000);
const thirtyMinutesAgo = new Date(
Date.now() - (global.feConfigs?.exportLimitMinutes || 0) * 60 * 1000
);
// auth export times
const authTimes = await User.findOne(
@@ -35,21 +39,19 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
);
if (!authTimes) {
throw new Error('上次导出未到半小时,每半小时仅可导出一次。');
const minutes = `${global.feConfigs?.exportLimitMinutes || 0} 分钟`;
throw new Error(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
}
// 统计数据
const count = await PgClient.count(PgTrainingTableName, {
where: [['kb_id', kbId], 'AND', ['user_id', userId]]
});
const where: any = [['kb_id', kbId], 'AND', ['user_id', userId]];
// 从 pg 中获取所有数据
const pgData = await PgClient.select<{ q: string; a: string; source: string }>(
PgTrainingTableName,
{
where: [['kb_id', kbId], 'AND', ['user_id', userId]],
where,
fields: ['q', 'a', 'source'],
order: [{ field: 'id', mode: 'DESC' }],
limit: count
limit: 1000000
}
);
@@ -78,7 +80,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
export const config = {
api: {
bodyParser: {
sizeLimit: '100mb'
sizeLimit: '200mb'
}
}
};

View File

@@ -5,6 +5,7 @@ import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import type { KbDataItemType } from '@/types/plugin';
import { PgTrainingTableName } from '@/constants/plugin';
import { OtherFileId } from '@/constants/kb';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -12,12 +13,14 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
kbId,
pageNum = 1,
pageSize = 10,
searchText = ''
searchText = '',
fileId = ''
} = req.body as {
kbId: string;
pageNum: number;
pageSize: number;
searchText: string;
fileId: string;
};
if (!kbId) {
throw new Error('缺少参数');
@@ -33,6 +36,11 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
['user_id', userId],
'AND',
['kb_id', kbId],
...(fileId
? fileId === OtherFileId
? ["AND (file_id IS NULL OR file_id = '')"]
: ['AND', ['file_id', fileId]]
: []),
...(searchText
? [
'AND',

View File

@@ -3,12 +3,12 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase, KB, App, TrainingData } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { Types } from 'mongoose';
import { PgTrainingTableName } from '@/constants/plugin';
import { GridFSStorage } from '@/service/lib/gridfs';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { id } = req.query as {
id: string;
};
@@ -20,26 +20,30 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
await connectToDatabase();
const deletedIds = [id, ...(await findAllChildrenIds(id))];
// delete training data
await TrainingData.deleteMany({
userId,
kbId: id
kbId: { $in: deletedIds }
});
// delete all pg data
await PgClient.delete(PgTrainingTableName, {
where: [['user_id', userId], 'AND', ['kb_id', id]]
where: [
['user_id', userId],
'AND',
`kb_id IN (${deletedIds.map((id) => `'${id}'`).join(',')})`
]
});
// delete related files
const gridFs = new GridFSStorage('dataset', userId);
await gridFs.deleteFilesByKbId(id);
await Promise.all(deletedIds.map((id) => gridFs.deleteFilesByKbId(id)));
// delete kb data
await KB.findOneAndDelete({
_id: id,
await KB.deleteMany({
_id: { $in: deletedIds },
userId
});
@@ -51,3 +55,17 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
});
}
}
async function findAllChildrenIds(id: string) {
// find children
const children = await KB.find({ parentId: id });
let allChildrenIds = children.map((child) => String(child._id));
for (const child of children) {
const grandChildrenIds = await findAllChildrenIds(child._id);
allChildrenIds = allChildrenIds.concat(grandChildrenIds);
}
return allChildrenIds;
}

View File

@@ -0,0 +1,55 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { Types } from 'mongoose';
import { OtherFileId } from '@/constants/kb';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId, kbId } = req.query as { fileId: string; kbId: string };
if (!fileId || !kbId) {
throw new Error('fileId and kbId is required');
}
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
if (fileId === OtherFileId) {
await PgClient.delete(PgTrainingTableName, {
where: [
['user_id', userId],
'AND',
['kb_id', kbId],
"AND (file_id IS NULL OR file_id = '')"
]
});
} else {
const gridFs = new GridFSStorage('dataset', userId);
const bucket = gridFs.GridFSBucket();
await gridFs.findAndAuthFile(fileId);
// delete all pg data
await PgClient.delete(PgTrainingTableName, {
where: [['user_id', userId], 'AND', ['kb_id', kbId], 'AND', ['file_id', fileId]]
});
// delete file
await bucket.delete(new Types.ObjectId(fileId));
}
jsonRes(res);
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -0,0 +1,59 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { Types } from 'mongoose';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { kbId } = req.query as { kbId: string };
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
const gridFs = new GridFSStorage('dataset', userId);
const bucket = gridFs.GridFSBucket();
const files = await bucket
// 1 hours expired
.find({
uploadDate: { $lte: new Date(Date.now() - 60 * 1000) },
['metadata.kbId']: kbId,
['metadata.userId']: userId
})
.sort({ _id: -1 })
.toArray();
const data = await Promise.all(
files.map(async (file) => {
return {
id: file._id,
chunkLength: await PgClient.count(PgTrainingTableName, {
fields: ['id'],
where: [
['user_id', userId],
'AND',
['kb_id', kbId],
'AND',
['file_id', String(file._id)]
]
})
};
})
);
await Promise.all(
data
.filter((item) => item.chunkLength === 0)
.map((file) => bucket.delete(new Types.ObjectId(file.id)))
);
jsonRes(res);
} catch (err) {
jsonRes(res);
}
}

View File

@@ -0,0 +1,43 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { OtherFileId } from '@/constants/kb';
import type { FileInfo } from '@/types/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId } = req.query as { kbId: string; fileId: string };
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
if (fileId === OtherFileId) {
return jsonRes<FileInfo>(res, {
data: {
id: OtherFileId,
size: 0,
filename: 'kb.Other Data',
uploadDate: new Date(),
encoding: '',
contentType: ''
}
});
}
const gridFs = new GridFSStorage('dataset', userId);
const file = await gridFs.findAndAuthFile(fileId);
jsonRes<FileInfo>(res, {
data: file
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -0,0 +1,84 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, TrainingData } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { KbFileItemType } from '@/types/plugin';
import { FileStatusEnum, OtherFileId } from '@/constants/kb';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
let { kbId, searchText } = req.query as { kbId: string; searchText: string };
searchText = searchText.replace(/'/g, '');
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
const gridFs = new GridFSStorage('dataset', userId);
const bucket = gridFs.GridFSBucket();
const files = await bucket
.find({ ['metadata.kbId']: kbId, ...(searchText && { filename: { $regex: searchText } }) })
.sort({ _id: -1 })
.toArray();
async function GetOtherData() {
return {
id: OtherFileId,
size: 0,
filename: 'kb.Other Data',
uploadTime: new Date(),
status: (await TrainingData.findOne({ userId, kbId, file_id: '' }))
? FileStatusEnum.embedding
: FileStatusEnum.ready,
chunkLength: await PgClient.count(PgTrainingTableName, {
fields: ['id'],
where: [
['user_id', userId],
'AND',
['kb_id', kbId],
"AND (file_id IS NULL OR file_id = '')"
]
})
};
}
const data = await Promise.all([
GetOtherData(),
...files.map(async (file) => {
return {
id: String(file._id),
size: file.length,
filename: file.filename,
uploadTime: file.uploadDate,
status: (await TrainingData.findOne({ userId, kbId, file_id: file._id }))
? FileStatusEnum.embedding
: FileStatusEnum.ready,
chunkLength: await PgClient.count(PgTrainingTableName, {
fields: ['id'],
where: [
['user_id', userId],
'AND',
['kb_id', kbId],
'AND',
['file_id', String(file._id)]
]
})
};
})
]);
jsonRes<KbFileItemType[]>(res, {
data: data.flat().filter((item) => item.chunkLength > 0)
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -2,29 +2,25 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, KB } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { KbListItemType } from '@/types/plugin';
import { getVectorModel } from '@/service/utils/data';
import { KbListItemType } from '@/types/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
const { parentId } = req.query as { parentId: string };
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
await connectToDatabase();
const kbList = await KB.find(
{
userId
},
'_id avatar name tags vectorModel'
).sort({ updateTime: -1 });
const kbList = await KB.find({
userId,
parentId: parentId || null
}).sort({ updateTime: -1 });
const data = await Promise.all(
kbList.map(async (item) => ({
_id: item._id,
avatar: item.avatar,
name: item.name,
tags: item.tags,
...item.toJSON(),
vectorModel: getVectorModel(item.vectorModel)
}))
);

View File

@@ -0,0 +1,36 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, KB } from '@/service/mongo';
import { KbPathItemType } from '@/types/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { parentId } = req.query as { parentId: string };
jsonRes<KbPathItemType[]>(res, {
data: await getParents(parentId)
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}
async function getParents(parentId?: string): Promise<KbPathItemType[]> {
if (!parentId) {
return [];
}
const parent = await KB.findById(parentId, 'name parentId');
if (!parent) return [];
const paths = await getParents(parent.parentId);
paths.push({ parentId, parentName: parent.name });
return paths;
}

View File

@@ -6,7 +6,7 @@ import type { KbUpdateParams } from '@/api/request/kb';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
const { id, name, tags, avatar } = req.body as KbUpdateParams;
const { id, name, avatar, tags = '' } = req.body as KbUpdateParams;
if (!id || !name) {
throw new Error('缺少参数');
@@ -23,8 +23,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
userId
},
{
avatar,
name,
...(name && { name }),
...(avatar && { avatar }),
tags: tags.split(' ').filter((item) => item)
}
);

View File

@@ -1,4 +1,4 @@
import type { FeConfigsType } from '@/types';
import type { FeConfigsType, SystemEnvType } from '@/types';
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { readFileSync } from 'fs';
@@ -29,12 +29,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
});
}
const defaultSystemEnv = {
const defaultSystemEnv: SystemEnvType = {
vectorMaxProcess: 15,
qaMaxProcess: 15,
pgIvfflatProbe: 20
};
const defaultFeConfigs = {
const defaultFeConfigs: FeConfigsType = {
show_emptyChat: true,
show_register: false,
show_appStore: false,
@@ -44,7 +44,7 @@ const defaultFeConfigs = {
show_doc: true,
systemTitle: 'FastGPT',
authorText: 'Made by FastGPT Team.',
gitLoginKey: '',
exportLimitMinutes: 0,
scripts: []
};
const defaultChatModels = [
@@ -99,8 +99,10 @@ export async function getInitConfig() {
const res = JSON.parse(readFileSync(filename, 'utf-8'));
console.log(res);
global.systemEnv = res.SystemParams || defaultSystemEnv;
global.feConfigs = res.FeConfig || defaultFeConfigs;
global.systemEnv = res.SystemParams
? { ...defaultSystemEnv, ...res.SystemParams }
: defaultSystemEnv;
global.feConfigs = res.FeConfig ? { ...defaultFeConfigs, ...res.FeConfig } : defaultFeConfigs;
global.chatModels = res.ChatModels || defaultChatModels;
global.qaModel = res.QAModel || defaultQAModel;
global.vectorModels = res.VectorModels || defaultVectorModels;