Files
FastGPT/packages/service/core/dataset/datasetSync/index.ts
T
Archer 3a5d725efd feature: 4.10.1 (#5201)
* add dynamic inputRender (#5127)

* dynamic input component

* fix

* fix

* fix

* perf: dynamic render input

* update doc

* perf: error catch

* num input ui

* fix form render (#5177)

* perf: i18n check

* add log

* doc

* Sync dataset  (#5181)

* perf: api dataset create (#5047)

* Sync dataset (#5120)

* add

* wait

* restructure dataset sync, update types and APIs, add sync hints, and remove legacy logic

* feat: add function to retrieve real file ID from third-party doc library and rename team permission check function for clarity

* fix come console

* refactor: rename team dataset limit check functions for clarity, update API dataset sync limit usage, and rename root directory to "ROOT_FOLDER"

* frat: update sync dataset login

* fix delete.ts

* feat: update pnpm-lock.yaml to include bullmq, fix comments in api.d.ts and type.d.ts, rename API file ID field, optimize dataset sync logic, and add website sync feature with related APIs

* feat: update CollectionCard to support site dataset sync, add API root ID constant and init sync API

* feat: add RootCollectionId constant to replace hardcoded root ID

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* perf: code

* feat: update success message for dataset sync, revise related i18n texts, and optimize file selection logic (#5166)

Co-authored-by: dreamer6680 <146868355@qq.com>

* perf: select file

* Sync dataset (#5180)

* feat: update success message for dataset sync, revise related i18n texts, and optimize file selection logic

* fix: make listfile function return rawid string

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* init sh

* fix: ts

---------

Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: dreamer6680 <146868355@qq.com>

* update doc

* i18n

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: dreamer6680 <146868355@qq.com>
2025-07-11 17:02:48 +08:00

102 lines
2.7 KiB
TypeScript

import { type Processor } from 'bullmq';
import { getQueue, getWorker, QueueNames } from '../../../common/bullmq';
import { DatasetStatusEnum } from '@fastgpt/global/core/dataset/constants';
export type DatasetSyncJobData = {
datasetId: string;
};
export const datasetSyncQueue = getQueue<DatasetSyncJobData>(QueueNames.datasetSync, {
defaultJobOptions: {
attempts: 3, // retry 3 times
backoff: {
type: 'exponential',
delay: 1000 // delay 1 second between retries
}
}
});
export const getDatasetSyncWorker = (processor: Processor<DatasetSyncJobData>) => {
return getWorker<DatasetSyncJobData>(QueueNames.datasetSync, processor, {
removeOnFail: {
age: 15 * 24 * 60 * 60, // Keep up to 15 days
count: 1000 // Keep up to 1000 jobs
},
concurrency: 1 // Set worker to process only 1 job at a time
});
};
export const addDatasetSyncJob = (data: DatasetSyncJobData) => {
const datasetId = String(data.datasetId);
// deduplication: make sure only 1 job
return datasetSyncQueue.add(datasetId, data, { deduplication: { id: datasetId } });
};
export const getDatasetSyncDatasetStatus = async (datasetId: string) => {
const jobId = await datasetSyncQueue.getDeduplicationJobId(datasetId);
if (!jobId) {
return {
status: DatasetStatusEnum.active,
errorMsg: undefined
};
}
const job = await datasetSyncQueue.getJob(jobId);
if (!job) {
return {
status: DatasetStatusEnum.active,
errorMsg: undefined
};
}
const jobState = await job.getState();
if (jobState === 'failed' || jobState === 'unknown') {
return {
status: DatasetStatusEnum.error,
errorMsg: job.failedReason
};
}
if (['waiting-children', 'waiting'].includes(jobState)) {
return {
status: DatasetStatusEnum.waiting,
errorMsg: undefined
};
}
if (jobState === 'active') {
return {
status: DatasetStatusEnum.syncing,
errorMsg: undefined
};
}
return {
status: DatasetStatusEnum.active,
errorMsg: undefined
};
};
// Scheduler setting
const repeatDuration = 24 * 60 * 60 * 1000; // every day
export const upsertDatasetSyncJobScheduler = (data: DatasetSyncJobData, startDate?: number) => {
const datasetId = String(data.datasetId);
return datasetSyncQueue.upsertJobScheduler(
datasetId,
{
every: repeatDuration,
startDate: startDate || new Date().getTime() + repeatDuration // First run tomorrow
},
{
name: datasetId,
data
}
);
};
export const getDatasetSyncJobScheduler = (datasetId: string) => {
return datasetSyncQueue.getJobScheduler(String(datasetId));
};
export const removeDatasetSyncJobScheduler = (datasetId: string) => {
return datasetSyncQueue.removeJobScheduler(String(datasetId));
};