This commit is contained in:
Archer
2023-11-15 11:36:25 +08:00
committed by GitHub
parent 592e1a93a2
commit bfd8be5df0
181 changed files with 2499 additions and 1552 deletions

View File

@@ -0,0 +1,169 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { delay } from '@/utils/tools';
import { PgClient } from '@fastgpt/service/common/pg';
import {
DatasetDataIndexTypeEnum,
PgDatasetTableName
} from '@fastgpt/global/core/dataset/constant';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { getUserDefaultTeam } from '@fastgpt/service/support/user/team/controller';
let success = 0;
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const { limit = 50 } = req.body as { limit: number };
await authCert({ req, authRoot: true });
await connectToDatabase();
success = 0;
try {
await Promise.allSettled([
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN data_id VARCHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN q DROP NOT NULL;`), // q can null
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN a DROP NOT NULL;`), // a can null
PgClient.query(
`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN team_id TYPE VARCHAR(50) USING team_id::VARCHAR(50);`
), // team_id varchar
PgClient.query(
`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN tmb_id TYPE VARCHAR(50) USING tmb_id::VARCHAR(50);`
), // tmb_id varchar
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN team_id SET NOT NULL;`), // team_id not null
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN tmb_id SET NOT NULL;`), // tmb_id not null
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN dataset_id SET NOT NULL;`), // dataset_id not null
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN collection_id SET NOT NULL;`) // collection_id not null
]);
} catch (error) {}
await initPgData();
jsonRes(res, {
data: await init(limit),
message:
'初始化任务已开始,请注意日志进度。可通过 select count(id) from modeldata where data_id is null; 检查是否完全初始化,如果结果为 0 ,则完全初始化。'
});
} catch (error) {
console.log(error);
jsonRes(res, {
code: 500,
error
});
}
}
type PgItemType = {
id: string;
q: string;
a: string;
dataset_id: string;
collection_id: string;
team_id: string;
tmb_id: string;
};
async function initPgData() {
const limit = 10;
const { rows } = await PgClient.query<{ user_id: string }>(`
SELECT DISTINCT user_id FROM ${PgDatasetTableName} WHERE team_id='null';
`);
console.log('init pg', rows.length);
let success = 0;
for (let i = 0; i < limit; i++) {
init(i);
}
async function init(index: number): Promise<any> {
const userId = rows[index]?.user_id;
if (!userId) return;
try {
const tmb = await getUserDefaultTeam({ userId });
// update pg
await PgClient.query(
`Update ${PgDatasetTableName} set team_id = '${tmb.teamId}', tmb_id = '${tmb.tmbId}' where user_id = '${userId}' AND team_id='null';`
);
console.log(++success);
init(index + limit);
} catch (error) {
if (error === 'default team not exist') {
return;
}
console.log(error);
await delay(1000);
return init(index);
}
}
}
async function init(limit: number): Promise<any> {
const { rows: idList } = await PgClient.query<{ id: string }>(
`SELECT id FROM ${PgDatasetTableName} WHERE data_id IS NULL`
);
console.log('totalCount', idList.length);
if (idList.length === 0) return;
for (let i = 0; i < limit; i++) {
initData(i);
}
async function initData(index: number): Promise<any> {
const dataId = idList[index]?.id;
if (!dataId) {
console.log('done');
return;
}
// get limit data where data_id is null
const { rows } = await PgClient.query<PgItemType>(
`SELECT id,q,a,dataset_id,collection_id,team_id,tmb_id FROM ${PgDatasetTableName} WHERE id=${dataId};`
);
const data = rows[0];
if (!data) {
console.log('done');
return;
}
let id = '';
try {
// create mongo data and update data_id
const { _id } = await MongoDatasetData.create({
teamId: data.team_id.trim(),
tmbId: data.tmb_id.trim(),
datasetId: data.dataset_id,
collectionId: data.collection_id,
q: data.q,
a: data.a,
indexes: [
{
defaultIndex: !data.a,
type: data.a ? DatasetDataIndexTypeEnum.qa : DatasetDataIndexTypeEnum.chunk,
dataId: data.id,
text: data.q
}
]
});
id = _id;
// update pg data_id
await PgClient.query(
`UPDATE ${PgDatasetTableName} SET data_id='${String(_id)}' WHERE id=${dataId};`
);
console.log(++success);
return initData(index + limit);
} catch (error) {
console.log(error);
console.log(data);
try {
if (id) {
await MongoDatasetData.findByIdAndDelete(id);
}
} catch (error) {}
await delay(500);
return initData(index);
}
}
}

View File

@@ -4,21 +4,14 @@ import { connectToDatabase } from '@/service/mongo';
import { MongoBill } from '@fastgpt/service/support/wallet/bill/schema';
import {
createDefaultTeam,
getTeamInfoByTmbId
getUserDefaultTeam
} from '@fastgpt/service/support/user/team/controller';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { UserModelSchema } from '@fastgpt/global/support/user/type';
import { delay } from '@/utils/tools';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import {
DatasetCollectionSchemaType,
DatasetSchemaType,
DatasetTrainingSchemaType
} from '@fastgpt/global/core/dataset/type';
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { Types } from 'mongoose';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { PgClient } from '@fastgpt/service/common/pg';
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
@@ -30,6 +23,7 @@ import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { MongoPlugin } from '@fastgpt/service/core/plugin/schema';
import { POST } from '@fastgpt/service/common/api/plusRequest';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { getGFSCollection } from '@fastgpt/service/common/file/gridfs/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -182,7 +176,7 @@ async function initMongoTeamId(limit: number) {
async function init(userId: string): Promise<any> {
try {
const tmb = await getTeamInfoByTmbId({ userId });
const tmb = await getUserDefaultTeam({ userId });
await schema.updateMany(
{
@@ -225,7 +219,7 @@ async function initDatasetAndApp() {
}
async function initCollectionFileTeam(limit: number) {
/* init user default Team */
const DatasetFile = connectionMongo.connection.db.collection(`dataset.files`);
const DatasetFile = getGFSCollection('dataset');
const matchWhere = {
$or: [{ 'metadata.teamId': { $exists: false } }, { 'metadata.teamId': null }]
};
@@ -264,7 +258,7 @@ async function initCollectionFileTeam(limit: number) {
async function init(userId: string): Promise<any> {
try {
const tmb = await getTeamInfoByTmbId({
const tmb = await getUserDefaultTeam({
userId
});
@@ -295,8 +289,8 @@ async function initPgData() {
// add column
try {
await Promise.all([
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN team_id CHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN tmb_id CHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN team_id VARCHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN tmb_id VARCHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN user_id DROP NOT NULL;`)
]);
} catch (error) {
@@ -316,7 +310,7 @@ async function initPgData() {
const userId = rows[index]?.user_id;
if (!userId) return;
try {
const tmb = await getTeamInfoByTmbId({ userId });
const tmb = await getUserDefaultTeam({ userId });
// update pg
await PgClient.query(
`Update ${PgDatasetTableName} set team_id = '${tmb.teamId}', tmb_id = '${tmb.tmbId}' where user_id = '${userId}' AND team_id IS NULL;`