Files
FastGPT/packages/service/core/dataset/migration/schema.ts
T
Archer 2ccb5b50c6 V4.14.4 features (#6036)
* feat: add query optimize and bill (#6021)

* add query optimize and bill

* perf: query extension

* fix: embe model

* remove log

* remove log

* fix: test

---------

Co-authored-by: xxyyh <2289112474@qq>
Co-authored-by: archer <545436317@qq.com>

* feat: notice (#6013)

* feat: record user's language

* feat: notice points/dataset indexes; support count limit; update docker-compose.yml

* fix: ts error

* feat: send auth code i18n

* chore: dataset notice limit

* chore: adjust

* fix: ts

* fix: countLimit race condition; i18n en-prefix locale fallback to en

---------

Co-authored-by: archer <545436317@qq.com>

* perf: comment

* perf: send inform code

* fix: type error (#6029)

* feat: add ip region for chat logs (#6010)

* feat: add ip region for chat logs

* refactor: use Geolite2.mmdb

* fix: export chat logs

* fix: return location directly

* test: add unit test

* perf: log show ip data

* adjust commercial plans (#6008)

* plan frontend

* plan limit

* coupon

* discount coupon

* fix

* type

* fix audit

* type

* plan name

* legacy plan

* track

* feat: add discount coupon

* fix

* fix discount coupon

* openapi

* type

* type

* env

* api type

* fix

* fix: simple agent plugin input & agent dashboard card (#6034)

* refactor: remove gridfs (#6031)

* fix: replace gridfs multer operations with s3 compatible ops

* wip: s3 features

* refactor: remove gridfs

* fix

* perf: mock test

* doc

* doc

* doc

* fix: test

* fix: s3

* fix: mock s3

* remove invalid config

* fix: init query extension

* initv4144 (#6037)

* chore: initv4144

* fix

* version

* fix: new plans (#6039)

* fix: new plans

* qr modal tip

* fix: buffer raw text filename (#6040)

* fix: initv4144 (#6041)

* fix: pay refresh (#6042)

* fix: migration shell

* rename collection

* clear timerlock

* clear timerlock

* perf: faq

* perf: bill schema

* fix: openapi

* doc

* fix: share var render

* feat: delete dataset queue

* plan usage display (#6043)

* plan usage display

* text

* fix

* fix: ts

* perf: remove invalid code

* perf: init shell

* doc

* perf: rename field

* perf: avatar presign

* init

* custom plan text (#6045)

* fix plans

* fix

* fixed

* computed

---------

Co-authored-by: archer <545436317@qq.com>

* init shell

* plan text & price page back button (#6046)

* init

* index

* delete dataset

* delete dataset

* perf: delete dataset

* init

---------

Co-authored-by: YeYuheng <57035043+YYH211@users.noreply.github.com>
Co-authored-by: xxyyh <2289112474@qq>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: Roy <whoeverimf5@gmail.com>
Co-authored-by: heheer <heheer@sealos.io>
2025-12-08 01:44:15 +08:00

256 lines
5.7 KiB
TypeScript

import { connectionMongo, getMongoModel } from '../../../common/mongo';
const { Schema } = connectionMongo;
export const DatasetMigrationLogCollectionName = 'dataset_migration_logs';
export type DatasetMigrationLogSchemaType = {
_id: string;
// 迁移批次信息
batchId: string; // 同一次运行的迁移使用相同的 batchId
migrationVersion: string; // 如 'v4.14.3'
// 资源类型和标识
resourceType: 'collection' | 'dataset_image'; // 支持不同类型的文件迁移
resourceId: string; // collection._id 或 image._id
teamId: string;
datasetId?: string; // collection 有,image 可能没有
// 迁移前后的存储信息
sourceStorage: {
type: 'gridfs';
fileId: string; // GridFS 的 ObjectId
bucketName: string; // 'dataset' or 'chat'
fileSize?: number; // 文件大小(字节)
checksum?: string; // MD5/SHA256
};
targetStorage?: {
type: 's3';
key: string; // S3 key
bucket?: string; // S3 bucket名称
fileSize?: number;
checksum?: string;
};
// 迁移状态
status: 'pending' | 'processing' | 'completed' | 'failed' | 'rollback' | 'verified';
// 时间戳
createdAt: Date; // 创建时间
startedAt?: Date; // 开始迁移时间
completedAt?: Date; // 完成时间
rolledBackAt?: Date; // 回滚时间
// 重试信息
attemptCount: number; // 尝试次数
maxAttempts: number; // 最大重试次数
lastAttemptAt?: Date; // 最后一次尝试时间
// 错误信息
error?: {
message: string;
stack?: string;
code?: string; // 错误代码,便于分类统计
phase: 'download' | 'upload' | 'verify' | 'update_db'; // 错误发生在哪个阶段
};
// 校验信息
verified: boolean; // 是否已验证数据一致性
verifiedAt?: Date;
// 操作日志(记录详细步骤)
operations: Array<{
action: string; // 'start_download', 'upload_to_s3', 'update_collection', 'rollback' 等
timestamp: Date;
success: boolean;
duration?: number; // 耗时(毫秒)
details?: any; // 额外信息
}>;
// 元数据(用于调试和审计)
metadata: {
fileName?: string; // 原文件名
fileType?: string; // 文件类型
originalUpdateTime?: Date; // collection 的原始更新时间
executorIp?: string; // 执行迁移的服务器 IP
nodeEnv?: string; // 'production' or 'development'
};
// 回滚信息
rollbackInfo?: {
reason: string; // 回滚原因
rolledBackBy?: string; // 操作人员或系统
s3FileDeleted: boolean; // S3 文件是否已删除
dbRestored: boolean; // 数据库是否已恢复
};
};
const DatasetMigrationLogSchema = new Schema({
// 批次信息
batchId: {
type: String,
required: true,
index: true
},
migrationVersion: {
type: String,
required: true
},
// 资源类型和标识
resourceType: {
type: String,
enum: ['collection', 'dataset_image'],
required: true
},
resourceId: {
type: Schema.Types.ObjectId,
required: true,
index: true
},
teamId: {
type: Schema.Types.ObjectId,
required: true,
index: true
},
datasetId: {
type: Schema.Types.ObjectId,
index: true
},
// 存储信息
sourceStorage: {
type: {
type: String,
default: 'gridfs'
},
fileId: {
type: String,
required: true
},
bucketName: String,
fileSize: Number,
checksum: String
},
targetStorage: {
type: {
type: String,
default: 's3'
},
key: String,
bucket: String,
fileSize: Number,
checksum: String
},
// 状态
status: {
type: String,
enum: ['pending', 'processing', 'completed', 'failed', 'rollback', 'verified'],
default: 'pending',
required: true,
index: true
},
// 时间戳
createdAt: {
type: Date,
default: () => new Date(),
index: true
},
startedAt: Date,
completedAt: Date,
rolledBackAt: Date,
// 重试信息
attemptCount: {
type: Number,
default: 0
},
maxAttempts: {
type: Number,
default: 3
},
lastAttemptAt: Date,
// 错误信息
error: {
message: String,
stack: String,
code: String,
phase: {
type: String,
enum: ['download', 'upload', 'verify', 'update_db']
}
},
// 校验信息
verified: {
type: Boolean,
default: false
},
verifiedAt: Date,
// 操作日志
operations: [
{
action: String,
timestamp: {
type: Date,
default: () => new Date()
},
success: Boolean,
duration: Number,
details: Schema.Types.Mixed
}
],
// 元数据
metadata: {
fileName: String,
fileType: String,
originalUpdateTime: Date,
executorIp: String,
nodeEnv: String
},
// 回滚信息
rollbackInfo: {
reason: String,
rolledBackBy: String,
s3FileDeleted: Boolean,
dbRestored: Boolean
}
});
// 索引优化
try {
// 查询某个批次的迁移状态
DatasetMigrationLogSchema.index({ batchId: 1, status: 1 });
// 查询某个资源的迁移历史
DatasetMigrationLogSchema.index({ resourceType: 1, resourceId: 1 });
// 查询失败的迁移(需要重试)
DatasetMigrationLogSchema.index({
status: 1,
attemptCount: 1,
lastAttemptAt: 1
});
// 查询某个团队的迁移情况
DatasetMigrationLogSchema.index({ teamId: 1, status: 1 });
// 唯一索引:同一个资源在同一个批次只能有一条记录
DatasetMigrationLogSchema.index({ batchId: 1, resourceType: 1, resourceId: 1 }, { unique: true });
} catch (error) {
console.log(error);
}
export const MongoDatasetMigrationLog = getMongoModel<DatasetMigrationLogSchemaType>(
DatasetMigrationLogCollectionName,
DatasetMigrationLogSchema
);