Files
FastGPT/packages/service/core/dataset/websiteSync/index.ts
Archer d171b2d3d8 website sync feature (#4429)
* perf: introduce BullMQ for website sync (#4403)

* perf: introduce BullMQ for website sync

* feat: new redis module

* fix: remove graceful shutdown

* perf: improve UI in dataset detail

- Updated the "change" icon SVG file.
- Modified i18n strings.
- Added new i18n string "immediate_sync".
- Improved UI in dataset detail page, including button icons and
background colors.

* refactor: Add chunkSettings to DatasetSchema

* perf: website sync ux

* env template

* fix: clean up website dataset when updating chunk settings (#4420)

* perf: check setting updated

* perf: worker currency

* feat: init script for website sync refactor (#4425)

* website feature doc

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
2025-04-07 09:55:11 +08:00

81 lines
2.3 KiB
TypeScript

import { Processor } from 'bullmq';
import { getQueue, getWorker, QueueNames } from '../../../common/bullmq';
import { DatasetStatusEnum } from '@fastgpt/global/core/dataset/constants';
export type WebsiteSyncJobData = {
datasetId: string;
};
export const websiteSyncQueue = getQueue<WebsiteSyncJobData>(QueueNames.websiteSync, {
defaultJobOptions: {
attempts: 3, // retry 3 times
backoff: {
type: 'exponential',
delay: 1000 // delay 1 second between retries
}
}
});
export const getWebsiteSyncWorker = (processor: Processor<WebsiteSyncJobData>) => {
return getWorker<WebsiteSyncJobData>(QueueNames.websiteSync, processor, {
removeOnFail: {
age: 15 * 24 * 60 * 60, // Keep up to 15 days
count: 1000 // Keep up to 1000 jobs
},
concurrency: 1 // Set worker to process only 1 job at a time
});
};
export const addWebsiteSyncJob = (data: WebsiteSyncJobData) => {
const datasetId = String(data.datasetId);
// deduplication: make sure only 1 job
return websiteSyncQueue.add(datasetId, data, { deduplication: { id: datasetId } });
};
export const getWebsiteSyncDatasetStatus = async (datasetId: string) => {
const jobId = await websiteSyncQueue.getDeduplicationJobId(datasetId);
if (!jobId) {
return DatasetStatusEnum.active;
}
const job = await websiteSyncQueue.getJob(jobId);
if (!job) {
return DatasetStatusEnum.active;
}
const jobState = await job.getState();
if (['waiting-children', 'waiting'].includes(jobState)) {
return DatasetStatusEnum.waiting;
}
if (jobState === 'active') {
return DatasetStatusEnum.syncing;
}
return DatasetStatusEnum.active;
};
// Scheduler setting
const repeatDuration = 24 * 60 * 60 * 1000; // every day
export const upsertWebsiteSyncJobScheduler = (data: WebsiteSyncJobData, startDate?: number) => {
const datasetId = String(data.datasetId);
return websiteSyncQueue.upsertJobScheduler(
datasetId,
{
every: repeatDuration,
startDate: startDate || new Date().getTime() + repeatDuration // First run tomorrow
},
{
name: datasetId,
data
}
);
};
export const getWebsiteSyncJobScheduler = (datasetId: string) => {
return websiteSyncQueue.getJobScheduler(String(datasetId));
};
export const removeWebsiteSyncJobScheduler = (datasetId: string) => {
return websiteSyncQueue.removeJobScheduler(String(datasetId));
};