Files
FastGPT/packages/service/core/dataset/collection/schema.ts
Archer d171b2d3d8 website sync feature (#4429)
* perf: introduce BullMQ for website sync (#4403)

* perf: introduce BullMQ for website sync

* feat: new redis module

* fix: remove graceful shutdown

* perf: improve UI in dataset detail

- Updated the "change" icon SVG file.
- Modified i18n strings.
- Added new i18n string "immediate_sync".
- Improved UI in dataset detail page, including button icons and
background colors.

* refactor: Add chunkSettings to DatasetSchema

* perf: website sync ux

* env template

* fix: clean up website dataset when updating chunk settings (#4420)

* perf: check setting updated

* perf: worker currency

* feat: init script for website sync refactor (#4425)

* website feature doc

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
2025-04-07 09:55:11 +08:00

143 lines
3.0 KiB
TypeScript

import { connectionMongo, getMongoModel } from '../../../common/mongo';
const { Schema } = connectionMongo;
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constants';
import { ChunkSettings, DatasetCollectionName } from '../schema';
import {
TeamCollectionName,
TeamMemberCollectionName
} from '@fastgpt/global/support/user/team/constant';
export const DatasetColCollectionName = 'dataset_collections';
const DatasetCollectionSchema = new Schema({
parentId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
default: null
},
teamId: {
type: Schema.Types.ObjectId,
ref: TeamCollectionName,
required: true
},
tmbId: {
type: Schema.Types.ObjectId,
ref: TeamMemberCollectionName,
required: true
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
// Basic info
type: {
type: String,
enum: Object.keys(DatasetCollectionTypeMap),
required: true
},
name: {
type: String,
required: true
},
tags: {
type: [String],
default: []
},
createTime: {
type: Date,
default: () => new Date()
},
updateTime: {
type: Date,
default: () => new Date()
},
// Metadata
// local file collection
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
// web link collection
rawLink: String,
// Api collection
apiFileId: String,
// external collection(Abandoned)
externalFileId: String,
externalFileUrl: String, // external import url
rawTextLength: Number,
hashRawText: String,
metadata: {
type: Object,
default: {}
},
forbid: Boolean,
// next sync time
nextSyncTime: Date,
// Parse settings
customPdfParse: Boolean,
// Chunk settings
...ChunkSettings
});
DatasetCollectionSchema.virtual('dataset', {
ref: DatasetCollectionName,
localField: 'datasetId',
foreignField: '_id',
justOne: true
});
try {
// auth file
DatasetCollectionSchema.index({ teamId: 1, fileId: 1 });
// list collection; deep find collections
DatasetCollectionSchema.index({
teamId: 1,
datasetId: 1,
parentId: 1,
updateTime: -1
});
// Tag filter
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, tags: 1 });
// create time filter
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 });
// next sync time filter
DatasetCollectionSchema.index(
{ type: 1, nextSyncTime: -1 },
{
partialFilterExpression: {
nextSyncTime: { $exists: true }
}
}
);
// Get collection by external file id
DatasetCollectionSchema.index(
{ datasetId: 1, externalFileId: 1 },
{
unique: true,
partialFilterExpression: {
externalFileId: { $exists: true }
}
}
);
} catch (error) {
console.log(error);
}
export const MongoDatasetCollection = getMongoModel<DatasetCollectionSchemaType>(
DatasetColCollectionName,
DatasetCollectionSchema
);