Files
FastGPT/packages/service/core/dataset/collection/schema.ts
Archer 3a5d725efd feature: 4.10.1 (#5201)
* add dynamic inputRender (#5127)

* dynamic input component

* fix

* fix

* fix

* perf: dynamic render input

* update doc

* perf: error catch

* num input ui

* fix form render (#5177)

* perf: i18n check

* add log

* doc

* Sync dataset  (#5181)

* perf: api dataset create (#5047)

* Sync dataset (#5120)

* add

* wait

* restructure dataset sync, update types and APIs, add sync hints, and remove legacy logic

* feat: add function to retrieve real file ID from third-party doc library and rename team permission check function for clarity

* fix come console

* refactor: rename team dataset limit check functions for clarity, update API dataset sync limit usage, and rename root directory to "ROOT_FOLDER"

* frat: update sync dataset login

* fix delete.ts

* feat: update pnpm-lock.yaml to include bullmq, fix comments in api.d.ts and type.d.ts, rename API file ID field, optimize dataset sync logic, and add website sync feature with related APIs

* feat: update CollectionCard to support site dataset sync, add API root ID constant and init sync API

* feat: add RootCollectionId constant to replace hardcoded root ID

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* perf: code

* feat: update success message for dataset sync, revise related i18n texts, and optimize file selection logic (#5166)

Co-authored-by: dreamer6680 <146868355@qq.com>

* perf: select file

* Sync dataset (#5180)

* feat: update success message for dataset sync, revise related i18n texts, and optimize file selection logic

* fix: make listfile function return rawid string

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* init sh

* fix: ts

---------

Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: dreamer6680 <146868355@qq.com>

* update doc

* i18n

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: dreamer6680 <146868355@qq.com>
2025-07-11 17:02:48 +08:00

138 lines
2.9 KiB
TypeScript

import { connectionMongo, getMongoModel } from '../../../common/mongo';
const { Schema } = connectionMongo;
import { type DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constants';
import { ChunkSettings, DatasetCollectionName } from '../schema';
import {
TeamCollectionName,
TeamMemberCollectionName
} from '@fastgpt/global/support/user/team/constant';
export const DatasetColCollectionName = 'dataset_collections';
const DatasetCollectionSchema = new Schema({
parentId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
default: null
},
teamId: {
type: Schema.Types.ObjectId,
ref: TeamCollectionName,
required: true
},
tmbId: {
type: Schema.Types.ObjectId,
ref: TeamMemberCollectionName,
required: true
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
// Basic info
type: {
type: String,
enum: Object.keys(DatasetCollectionTypeMap),
required: true
},
name: {
type: String,
required: true
},
tags: {
type: [String],
default: []
},
createTime: {
type: Date,
default: () => new Date()
},
updateTime: {
type: Date,
default: () => new Date()
},
// Metadata
// local file collection
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
// web link collection
rawLink: String,
// Api collection
apiFileId: String,
// external collection(Abandoned)
externalFileId: String,
externalFileUrl: String, // external import url
rawTextLength: Number,
hashRawText: String,
metadata: {
type: Object,
default: {}
},
forbid: Boolean,
// Parse settings
customPdfParse: Boolean,
apiFileParentId: String,
// Chunk settings
...ChunkSettings
});
DatasetCollectionSchema.virtual('dataset', {
ref: DatasetCollectionName,
localField: 'datasetId',
foreignField: '_id',
justOne: true
});
try {
// auth file
DatasetCollectionSchema.index({ teamId: 1, fileId: 1 });
// list collection; deep find collections
DatasetCollectionSchema.index({
teamId: 1,
datasetId: 1,
parentId: 1,
updateTime: -1
});
// Tag filter
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, tags: 1 });
// create time filter
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 });
// Get collection by external file id
DatasetCollectionSchema.index(
{ datasetId: 1, externalFileId: 1 },
{
unique: true,
partialFilterExpression: {
externalFileId: { $exists: true }
}
}
);
// Clear invalid image
DatasetCollectionSchema.index({
teamId: 1,
'metadata.relatedImgId': 1
});
} catch (error) {
console.log(error);
}
export const MongoDatasetCollection = getMongoModel<DatasetCollectionSchemaType>(
DatasetColCollectionName,
DatasetCollectionSchema
);