* move file

* perf: dataset file manage

* v441 description

* fix: qa csv update file

* feat: rename file

* frontend show system-version
This commit is contained in:
Archer
2023-09-13 17:00:17 +08:00
committed by GitHub
parent be3b680bc6
commit a19afca148
53 changed files with 570 additions and 301 deletions

View File

@@ -3,7 +3,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -12,7 +12,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const { rowCount } = await PgClient.query(`SELECT 1
FROM information_schema.columns
WHERE table_schema = 'public'
AND table_name = '${PgTrainingTableName}'
AND table_name = '${PgDatasetTableName}'
AND column_name = 'file_id'`);
if (rowCount > 0) {
@@ -23,7 +23,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
jsonRes(res, {
data: await PgClient.query(
`ALTER TABLE ${PgTrainingTableName} ADD COLUMN file_id VARCHAR(100)`
`ALTER TABLE ${PgDatasetTableName} ADD COLUMN file_id VARCHAR(100)`
)
});
} catch (error) {

View File

@@ -5,7 +5,7 @@ import { authUser } from '@/service/utils/auth';
import { connectToDatabase, KB } from '@/service/mongo';
import { KbTypeEnum } from '@/constants/kb';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -24,7 +24,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
}
);
const response = await PgClient.update(PgTrainingTableName, {
const response = await PgClient.update(PgDatasetTableName, {
where: [['file_id', 'undefined']],
values: [{ key: 'file_id', value: '' }]
});

View File

@@ -0,0 +1,35 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
import { connectToDatabase } from '@/service/mongo';
import mongoose from 'mongoose';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
await authUser({ req, authRoot: true });
const data = await mongoose.connection.db
.collection('dataset.files')
.updateMany({}, { $set: { 'metadata.datasetUsed': true } });
// update pg data
const pg = await PgClient.query(`UPDATE ${PgDatasetTableName}
SET file_id = ''
WHERE (file_id = 'undefined' OR LENGTH(file_id) < 20) AND file_id != '';`);
jsonRes(res, {
data: {
data,
pg
}
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -4,7 +4,7 @@ import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
import { Types } from 'mongoose';
import { OtherFileId } from '@/constants/kb';
@@ -22,7 +22,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const { userId } = await authUser({ req, authToken: true });
if (fileId === OtherFileId) {
await PgClient.delete(PgTrainingTableName, {
await PgClient.delete(PgDatasetTableName, {
where: [
['user_id', userId],
'AND',
@@ -37,7 +37,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
await gridFs.findAndAuthFile(fileId);
// delete all pg data
await PgClient.delete(PgTrainingTableName, {
await PgClient.delete(PgDatasetTableName, {
where: [['user_id', userId], 'AND', ['kb_id', kbId], 'AND', ['file_id', fileId]]
});

View File

@@ -0,0 +1,31 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { kbId } = req.query as { kbId: string };
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
const gridFs = new GridFSStorage('dataset', userId);
const collection = gridFs.Collection();
const files = await collection.deleteMany({
uploadDate: { $lte: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000) },
['metadata.kbId']: kbId,
['metadata.userId']: userId,
['metadata.datasetUsed']: { $ne: true }
});
jsonRes(res, {
data: files
});
} catch (err) {
jsonRes(res);
}
}

View File

@@ -4,9 +4,8 @@ import { connectToDatabase, TrainingData } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
import { FileStatusEnum, OtherFileId } from '@/constants/kb';
import mongoose from 'mongoose';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -16,28 +15,37 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
pageNum = 1,
pageSize = 10,
kbId,
searchText
searchText = ''
} = req.body as { pageNum: number; pageSize: number; kbId: string; searchText: string };
searchText = searchText.replace(/'/g, '');
searchText = searchText?.replace(/'/g, '');
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
// find files
const gridFs = new GridFSStorage('dataset', userId);
const bucket = gridFs.GridFSBucket();
const collection = gridFs.Collection();
const mongoWhere = {
['metadata.kbId']: kbId,
['metadata.userId']: userId,
['metadata.datasetUsed']: true,
...(searchText && { filename: { $regex: searchText } })
};
const [files, total] = await Promise.all([
bucket
.find(mongoWhere)
.sort({ _id: -1 })
collection
.find(mongoWhere, {
projection: {
_id: 1,
filename: 1,
uploadDate: 1,
length: 1
}
})
.skip((pageNum - 1) * pageSize)
.limit(pageSize)
.toArray(),
mongoose.connection.db.collection('dataset.files').countDocuments(mongoWhere)
collection.countDocuments(mongoWhere)
]);
async function GetOtherData() {
@@ -49,7 +57,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
status: (await TrainingData.findOne({ userId, kbId, file_id: '' }))
? FileStatusEnum.embedding
: FileStatusEnum.ready,
chunkLength: await PgClient.count(PgTrainingTableName, {
chunkLength: await PgClient.count(PgDatasetTableName, {
fields: ['id'],
where: [
['user_id', userId],
@@ -72,7 +80,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
status: (await TrainingData.findOne({ userId, kbId, file_id: file._id }))
? FileStatusEnum.embedding
: FileStatusEnum.ready,
chunkLength: await PgClient.count(PgTrainingTableName, {
chunkLength: await PgClient.count(PgDatasetTableName, {
fields: ['id'],
where: [
['user_id', userId],
@@ -90,7 +98,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
data: {
pageNum,
pageSize,
data: data.flat().filter((item) => item.chunkLength > 0),
data: data.flat(),
total
}
});

View File

@@ -0,0 +1,66 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { UpdateFileProps } from '@/api/core/dataset/file.d';
import { Types } from 'mongoose';
import { PgClient } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { id, name, datasetUsed } = req.body as UpdateFileProps;
const { userId } = await authUser({ req, authToken: true });
const gridFs = new GridFSStorage('dataset', userId);
const collection = gridFs.Collection();
await collection.findOneAndUpdate(
{
_id: new Types.ObjectId(id)
},
{
$set: {
...(name && { filename: name }),
...(datasetUsed && { ['metadata.datasetUsed']: datasetUsed })
}
}
);
// data source
updateDatasetSource({
fileId: id,
userId,
name
});
jsonRes(res, {});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}
async function updateDatasetSource(data: { fileId: string; userId: string; name?: string }) {
const { fileId, userId, name } = data;
if (!fileId || !name || !userId) return;
try {
await PgClient.update(PgDatasetTableName, {
where: [['user_id', userId], 'AND', ['file_id', fileId]],
values: [
{
key: 'source',
value: name
}
]
});
} catch (error) {
setTimeout(() => {
updateDatasetSource(data);
}, 2000);
}
}

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { withNextCors } from '@/service/utils/tools';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -18,7 +18,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
// 凭证校验
const { userId } = await authUser({ req });
await PgClient.delete(PgTrainingTableName, {
await PgClient.delete(PgDatasetTableName, {
where: [['user_id', userId], 'AND', ['id', dataId]]
});

View File

@@ -4,7 +4,7 @@ import { connectToDatabase, TrainingData, KB } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { authKb } from '@/service/utils/auth';
import { withNextCors } from '@/service/utils/tools';
import { PgTrainingTableName, TrainingModeEnum } from '@/constants/plugin';
import { PgDatasetTableName, TrainingModeEnum } from '@/constants/plugin';
import { startQueue } from '@/service/utils/tools';
import { PgClient } from '@/service/pg';
import { modelToolMap } from '@/utils/plugin';
@@ -136,7 +136,7 @@ export async function pushDataToKb({
try {
const { rows } = await PgClient.query(`
SELECT COUNT(*) > 0 AS exists
FROM ${PgTrainingTableName}
FROM ${PgDatasetTableName}
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND kb_id='${kbId}'
`);
const exists = rows[0]?.exists || false;

View File

@@ -5,7 +5,7 @@ import { PgClient } from '@/service/pg';
import { withNextCors } from '@/service/utils/tools';
import { getVector } from '../plugin/vector';
import type { KbTestItemType } from '@/types/plugin';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
import { KB } from '@/service/mongo';
export type Props = {
@@ -43,7 +43,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
SET LOCAL ivfflat.probes = ${global.systemEnv.pgIvfflatProbe || 10};
select id, q, a, source, file_id, (vector <#> '[${
vectors[0]
}]') * -1 AS score from ${PgTrainingTableName} where kb_id='${kbId}' AND user_id='${userId}' order by vector <#> '[${
}]') * -1 AS score from ${PgDatasetTableName} where kb_id='${kbId}' AND user_id='${userId}' order by vector <#> '[${
vectors[0]
}]' limit 12;
COMMIT;`

View File

@@ -5,7 +5,7 @@ import { PgClient } from '@/service/pg';
import { withNextCors } from '@/service/utils/tools';
import { KB, connectToDatabase } from '@/service/mongo';
import { getVector } from '../plugin/vector';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
export type Props = {
dataId: string;
@@ -47,7 +47,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
})();
// 更新 pg 内容.仅修改a不需要更新向量。
await PgClient.update(PgTrainingTableName, {
await PgClient.update(PgDatasetTableName, {
where: [['id', dataId], 'AND', ['user_id', userId]],
values: [
{ key: 'a', value: a.replace(/'/g, '"') },

View File

@@ -3,14 +3,13 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase, User } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { OtherFileId } from '@/constants/kb';
import { PgDatasetTableName } from '@/constants/plugin';
import { findAllChildrenIds } from '../delete';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
let { kbId, fileId } = req.query as {
let { kbId } = req.query as {
kbId: string;
fileId: string;
};
if (!kbId) {
@@ -22,6 +21,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
const exportIds = [kbId, ...(await findAllChildrenIds(kbId))];
console.log(exportIds);
const thirtyMinutesAgo = new Date(
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
);
@@ -43,10 +45,14 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
throw new Error(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
}
const where: any = [['kb_id', kbId], 'AND', ['user_id', userId]];
const where: any = [
['user_id', userId],
'AND',
`kb_id IN (${exportIds.map((id) => `'${id}'`).join(',')})`
];
// 从 pg 中获取所有数据
const pgData = await PgClient.select<{ q: string; a: string; source: string }>(
PgTrainingTableName,
PgDatasetTableName,
{
where,
fields: ['q', 'a', 'source'],

View File

@@ -4,7 +4,7 @@ import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import type { KbDataItemType } from '@/types/plugin';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
export type Response = {
id: string;
@@ -29,7 +29,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const where: any = [['user_id', userId], 'AND', ['id', dataId]];
const searchRes = await PgClient.select<KbDataItemType>(PgTrainingTableName, {
const searchRes = await PgClient.select<KbDataItemType>(PgDatasetTableName, {
fields: ['kb_id', 'id', 'q', 'a', 'source', 'file_id'],
where,
limit: 1

View File

@@ -4,7 +4,7 @@ import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import type { KbDataItemType } from '@/types/plugin';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
import { OtherFileId } from '@/constants/kb';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -50,14 +50,14 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
];
const [searchRes, total] = await Promise.all([
PgClient.select<KbDataItemType>(PgTrainingTableName, {
PgClient.select<KbDataItemType>(PgDatasetTableName, {
fields: ['id', 'q', 'a', 'source', 'file_id'],
where,
order: [{ field: 'id', mode: 'DESC' }],
limit: pageSize,
offset: pageSize * (pageNum - 1)
}),
PgClient.count(PgTrainingTableName, {
PgClient.count(PgDatasetTableName, {
fields: ['id'],
where
})

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase, KB } from '@/service/mongo';
import { authKb, authUser } from '@/service/utils/auth';
import { withNextCors } from '@/service/utils/tools';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
import { insertKbItem, PgClient } from '@/service/pg';
import { modelToolMap } from '@/utils/plugin';
import { getVectorModel } from '@/service/utils/data';
@@ -45,7 +45,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
const { rows: existsRows } = await PgClient.query(`
SELECT COUNT(*) > 0 AS exists
FROM ${PgTrainingTableName}
FROM ${PgDatasetTableName}
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND kb_id='${kbId}'
`);
const exists = existsRows[0]?.exists || false;

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase, KB, App, TrainingData } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { PgDatasetTableName } from '@/constants/plugin';
import { GridFSStorage } from '@/service/lib/gridfs';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -29,7 +29,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
});
// delete all pg data
await PgClient.delete(PgTrainingTableName, {
await PgClient.delete(PgDatasetTableName, {
where: [
['user_id', userId],
'AND',
@@ -56,7 +56,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
}
}
async function findAllChildrenIds(id: string) {
export async function findAllChildrenIds(id: string) {
// find children
const children = await KB.find({ parentId: id });

View File

@@ -1,59 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { Types } from 'mongoose';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { kbId } = req.query as { kbId: string };
// 凭证校验
const { userId } = await authUser({ req, authToken: true });
const gridFs = new GridFSStorage('dataset', userId);
const bucket = gridFs.GridFSBucket();
const files = await bucket
// 1 hours expired
.find({
uploadDate: { $lte: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000) },
['metadata.kbId']: kbId,
['metadata.userId']: userId
})
.sort({ _id: -1 })
.toArray();
const data = await Promise.all(
files.map(async (file) => {
return {
id: file._id,
chunkLength: await PgClient.count(PgTrainingTableName, {
fields: ['id'],
where: [
['user_id', userId],
'AND',
['kb_id', kbId],
'AND',
['file_id', String(file._id)]
]
})
};
})
);
await Promise.all(
data
.filter((item) => item.chunkLength === 0)
.map((file) => bucket.delete(new Types.ObjectId(file.id)))
);
jsonRes(res);
} catch (err) {
jsonRes(res);
}
}

View File

@@ -23,7 +23,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
});
jsonRes(res, {
data: `/api/plugins/file/read?token=${token}`
data: `/api/support/file/read?token=${token}`
});
} catch (error) {
jsonRes(res, {

View File

@@ -13,6 +13,7 @@ export type InitDateResponse = {
qaModel: QAModelItemType;
vectorModels: VectorModelItemType[];
feConfigs: FeConfigsType;
systemVersion: string;
};
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
@@ -24,7 +25,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
feConfigs: global.feConfigs,
chatModels: global.chatModels,
qaModel: global.qaModel,
vectorModels: global.vectorModels
vectorModels: global.vectorModels,
systemVersion: process.env.npm_package_version || '0.0.0'
}
});
}