feature: 4.10.1 (#5201)

* add dynamic inputRender (#5127)

* dynamic input component

* fix

* fix

* fix

* perf: dynamic render input

* update doc

* perf: error catch

* num input ui

* fix form render (#5177)

* perf: i18n check

* add log

* doc

* Sync dataset  (#5181)

* perf: api dataset create (#5047)

* Sync dataset (#5120)

* add

* wait

* restructure dataset sync, update types and APIs, add sync hints, and remove legacy logic

* feat: add function to retrieve real file ID from third-party doc library and rename team permission check function for clarity

* fix come console

* refactor: rename team dataset limit check functions for clarity, update API dataset sync limit usage, and rename root directory to "ROOT_FOLDER"

* frat: update sync dataset login

* fix delete.ts

* feat: update pnpm-lock.yaml to include bullmq, fix comments in api.d.ts and type.d.ts, rename API file ID field, optimize dataset sync logic, and add website sync feature with related APIs

* feat: update CollectionCard to support site dataset sync, add API root ID constant and init sync API

* feat: add RootCollectionId constant to replace hardcoded root ID

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* perf: code

* feat: update success message for dataset sync, revise related i18n texts, and optimize file selection logic (#5166)

Co-authored-by: dreamer6680 <146868355@qq.com>

* perf: select file

* Sync dataset (#5180)

* feat: update success message for dataset sync, revise related i18n texts, and optimize file selection logic

* fix: make listfile function return rawid string

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* init sh

* fix: ts

---------

Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: dreamer6680 <146868355@qq.com>

* update doc

* i18n

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: dreamer6680 <146868355@qq.com>
This commit is contained in:
Archer
2025-07-11 17:02:48 +08:00
committed by GitHub
parent 2865419952
commit 3a5d725efd
92 changed files with 2336 additions and 2235 deletions

View File

@@ -1,5 +1,4 @@
import type {
APIFileListResponse,
ApiFileReadContentResponse,
APIFileReadResponse,
ApiDatasetDetailResponse,
@@ -19,6 +18,16 @@ type ResponseDataType = {
data: any;
};
type APIFileListResponse = {
id: string;
parentId: ParentIdType;
name: string;
type: 'file' | 'folder';
updateTime: Date;
createTime: Date;
hasChild?: boolean;
};
export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }) => {
const instance = axios.create({
baseURL: apiServer.baseUrl,
@@ -106,6 +115,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
const formattedFiles = files.map((file) => ({
...file,
rawId: file.id,
hasChild: file.hasChild ?? file.type === 'folder'
}));
@@ -201,18 +211,27 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
if (fileData) {
return {
id: fileData.id,
rawId: apiFileId,
name: fileData.name,
parentId: fileData.parentId === null ? '' : fileData.parentId
parentId: fileData.parentId === null ? '' : fileData.parentId,
type: fileData.type,
updateTime: fileData.updateTime,
createTime: fileData.createTime
};
}
return Promise.reject('File not found');
};
const getFileRawId = (fileId: string) => {
return fileId;
};
return {
getFileContent,
listFiles,
getFilePreviewUrl,
getFileDetail
getFileDetail,
getFileRawId
};
};

View File

@@ -1,5 +1,5 @@
import type {
APIFileItem,
APIFileItemType,
ApiFileReadContentResponse,
ApiDatasetDetailResponse,
FeishuServer
@@ -104,7 +104,11 @@ export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: Feishu
.catch((err) => responseError(err));
};
const listFiles = async ({ parentId }: { parentId?: ParentIdType }): Promise<APIFileItem[]> => {
const listFiles = async ({
parentId
}: {
parentId?: ParentIdType;
}): Promise<APIFileItemType[]> => {
const fetchFiles = async (pageToken?: string): Promise<FeishuFileListResponse['files']> => {
const data = await request<FeishuFileListResponse>(
`/open-apis/drive/v1/files`,
@@ -130,6 +134,7 @@ export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: Feishu
.filter((file) => ['folder', 'docx'].includes(file.type))
.map((file) => ({
id: file.token,
rawId: file.token,
parentId: file.parent_token,
name: file.name,
type: file.type === 'folder' ? ('folder' as const) : ('file' as const),
@@ -186,23 +191,33 @@ export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: Feishu
}: {
apiFileId: string;
}): Promise<ApiDatasetDetailResponse> => {
const { document } = await request<{ document: { title: string } }>(
const { document } = await request<{ document: { title: string; type: string } }>(
`/open-apis/docx/v1/documents/${apiFileId}`,
{},
'GET'
);
return {
rawId: apiFileId,
name: document?.title,
parentId: null,
id: apiFileId
id: apiFileId,
type: document.type === 'folder' ? ('folder' as const) : ('file' as const),
hasChild: document.type === 'folder',
updateTime: new Date(),
createTime: new Date()
};
};
const getFileRawId = (fileId: string) => {
return fileId;
};
return {
getFileContent,
listFiles,
getFilePreviewUrl,
getFileDetail
getFileDetail,
getFileRawId
};
};

View File

@@ -1,5 +1,5 @@
import type {
APIFileItem,
APIFileItemType,
ApiFileReadContentResponse,
YuqueServer,
ApiDatasetDetailResponse
@@ -106,7 +106,7 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
if (yuqueServer.basePath) parentId = yuqueServer.basePath;
}
let files: APIFileItem[] = [];
let files: APIFileItemType[] = [];
if (!parentId) {
const limit = 100;
@@ -133,7 +133,8 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
files = allData.map((item) => {
return {
id: item.id,
id: String(item.id),
rawId: String(item.id),
name: item.name,
parentId: null,
type: 'folder',
@@ -144,7 +145,8 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
};
});
} else {
if (typeof parentId === 'number') {
const numParentId = Number(parentId);
if (!isNaN(numParentId)) {
const data = await request<YuqueTocListResponse>(
`/api/v2/repos/${parentId}/toc`,
{},
@@ -155,6 +157,7 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
.filter((item) => !item.parent_uuid && item.type !== 'LINK')
.map((item) => ({
id: `${parentId}-${item.id}-${item.uuid}`,
rawId: String(item.uuid),
name: item.title,
parentId: item.parent_uuid,
type: item.type === 'TITLE' ? ('folder' as const) : ('file' as const),
@@ -167,11 +170,11 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
} else {
const [repoId, uuid, parentUuid] = parentId.split(/-(.*?)-(.*)/);
const data = await request<YuqueTocListResponse>(`/api/v2/repos/${repoId}/toc`, {}, 'GET');
return data
.filter((item) => item.parent_uuid === parentUuid)
.map((item) => ({
id: `${repoId}-${item.id}-${item.uuid}`,
rawId: String(item.uuid),
name: item.title,
parentId: item.parent_uuid,
type: item.type === 'TITLE' ? ('folder' as const) : ('file' as const),
@@ -207,6 +210,10 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
'GET'
);
if (!data.title) {
return Promise.reject('Cannot find the file');
}
return {
title: data.title,
rawText: data.body
@@ -266,8 +273,13 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
}
return {
id: file.id,
rawId: file.id,
name: file.name,
parentId: null
parentId: null,
type: file.type === 'TITLE' ? ('folder' as const) : ('file' as const),
updateTime: file.updated_at,
createTime: file.created_at,
hasChild: true
};
} else {
const [repoId, parentUuid, fileId] = apiFileId.split(/-(.*?)-(.*)/);
@@ -283,23 +295,43 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
if (file.parent_uuid) {
return {
id: file.id,
rawId: file.id,
name: file.title,
parentId: parentId
parentId: parentId,
type: file.type === 'TITLE' ? ('folder' as const) : ('file' as const),
updateTime: new Date(),
createTime: new Date(),
hasChild: !!file.child_uuid
};
} else {
return {
id: file.id,
rawId: file.id,
name: file.title,
parentId: repoId
parentId: repoId,
type: file.type === 'TITLE' ? ('folder' as const) : ('file' as const),
updateTime: new Date(),
createTime: new Date(),
hasChild: !!file.child_uuid
};
}
}
};
const getFileRawId = (fileId: string) => {
const [repoId, parentUuid, fileUuid] = fileId.split(/-(.*?)-(.*)/);
if (fileUuid) {
return `${fileUuid}`;
} else {
return `${repoId}`;
}
};
return {
getFileContent,
listFiles,
getFilePreviewUrl,
getFileDetail
getFileDetail,
getFileRawId
};
};

View File

@@ -180,18 +180,6 @@ export const createCollectionAndInsertData = async ({
hashRawText: rawText ? hashStr(rawText) : undefined,
rawTextLength: rawText?.length,
nextSyncTime: (() => {
// ignore auto collections sync for website datasets
if (!dataset.autoSync && dataset.type === DatasetTypeEnum.websiteDataset) return undefined;
if (
[DatasetCollectionTypeEnum.link, DatasetCollectionTypeEnum.apiFile].includes(
formatCreateCollectionParams.type
)
) {
return addDays(new Date(), 1);
}
return undefined;
})(),
session
});
@@ -285,7 +273,8 @@ export async function createOneCollection({ session, ...props }: CreateOneCollec
rawLink,
externalFileId,
externalFileUrl,
apiFileId
apiFileId,
apiFileParentId
} = props;
const collectionTags = await createOrGetCollectionTags({
@@ -310,7 +299,8 @@ export async function createOneCollection({ session, ...props }: CreateOneCollec
...(rawLink ? { rawLink } : {}),
...(externalFileId ? { externalFileId } : {}),
...(externalFileUrl ? { externalFileUrl } : {}),
...(apiFileId ? { apiFileId } : {})
...(apiFileId ? { apiFileId } : {}),
...(apiFileParentId ? { apiFileParentId } : {})
}
],
{ session, ordered: true }

View File

@@ -78,11 +78,10 @@ const DatasetCollectionSchema = new Schema({
},
forbid: Boolean,
// next sync time
nextSyncTime: Date,
// Parse settings
customPdfParse: Boolean,
apiFileParentId: String,
// Chunk settings
...ChunkSettings
@@ -112,16 +111,6 @@ try {
// create time filter
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 });
// next sync time filter
DatasetCollectionSchema.index(
{ type: 1, nextSyncTime: -1 },
{
partialFilterExpression: {
nextSyncTime: { $exists: true }
}
}
);
// Get collection by external file id
DatasetCollectionSchema.index(
{ datasetId: 1, externalFileId: 1 },

View File

@@ -173,37 +173,39 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
// Check if the original text is the same: skip if same
const hashRawText = hashStr(rawText);
if (collection.hashRawText && hashRawText === collection.hashRawText) {
return DatasetCollectionSyncResultEnum.sameRaw;
if (collection.hashRawText && hashRawText !== collection.hashRawText) {
await mongoSessionRun(async (session) => {
// Delete old collection
await delCollection({
collections: [collection],
delImg: false,
delFile: false,
session
});
// Create new collection
await createCollectionAndInsertData({
session,
dataset,
rawText: rawText,
createCollectionParams: {
...collection,
name: title || collection.name,
updateTime: new Date(),
tags: await collectionTagsToTagLabel({
datasetId: collection.datasetId,
tags: collection.tags
})
}
});
});
return DatasetCollectionSyncResultEnum.success;
} else if (collection.name !== title) {
await MongoDatasetCollection.updateOne({ _id: collection._id }, { $set: { name: title } });
return DatasetCollectionSyncResultEnum.success;
}
await mongoSessionRun(async (session) => {
// Delete old collection
await delCollection({
collections: [collection],
delImg: false,
delFile: false,
session
});
// Create new collection
await createCollectionAndInsertData({
session,
dataset,
rawText: rawText,
createCollectionParams: {
...collection,
name: title || collection.name,
updateTime: new Date(),
tags: await collectionTagsToTagLabel({
datasetId: collection.datasetId,
tags: collection.tags
})
}
});
});
return DatasetCollectionSyncResultEnum.success;
return DatasetCollectionSyncResultEnum.sameRaw;
};
/*

View File

@@ -2,11 +2,11 @@ import { type Processor } from 'bullmq';
import { getQueue, getWorker, QueueNames } from '../../../common/bullmq';
import { DatasetStatusEnum } from '@fastgpt/global/core/dataset/constants';
export type WebsiteSyncJobData = {
export type DatasetSyncJobData = {
datasetId: string;
};
export const websiteSyncQueue = getQueue<WebsiteSyncJobData>(QueueNames.websiteSync, {
export const datasetSyncQueue = getQueue<DatasetSyncJobData>(QueueNames.datasetSync, {
defaultJobOptions: {
attempts: 3, // retry 3 times
backoff: {
@@ -15,8 +15,8 @@ export const websiteSyncQueue = getQueue<WebsiteSyncJobData>(QueueNames.websiteS
}
}
});
export const getWebsiteSyncWorker = (processor: Processor<WebsiteSyncJobData>) => {
return getWorker<WebsiteSyncJobData>(QueueNames.websiteSync, processor, {
export const getDatasetSyncWorker = (processor: Processor<DatasetSyncJobData>) => {
return getWorker<DatasetSyncJobData>(QueueNames.datasetSync, processor, {
removeOnFail: {
age: 15 * 24 * 60 * 60, // Keep up to 15 days
count: 1000 // Keep up to 1000 jobs
@@ -25,21 +25,21 @@ export const getWebsiteSyncWorker = (processor: Processor<WebsiteSyncJobData>) =
});
};
export const addWebsiteSyncJob = (data: WebsiteSyncJobData) => {
export const addDatasetSyncJob = (data: DatasetSyncJobData) => {
const datasetId = String(data.datasetId);
// deduplication: make sure only 1 job
return websiteSyncQueue.add(datasetId, data, { deduplication: { id: datasetId } });
return datasetSyncQueue.add(datasetId, data, { deduplication: { id: datasetId } });
};
export const getWebsiteSyncDatasetStatus = async (datasetId: string) => {
const jobId = await websiteSyncQueue.getDeduplicationJobId(datasetId);
export const getDatasetSyncDatasetStatus = async (datasetId: string) => {
const jobId = await datasetSyncQueue.getDeduplicationJobId(datasetId);
if (!jobId) {
return {
status: DatasetStatusEnum.active,
errorMsg: undefined
};
}
const job = await websiteSyncQueue.getJob(jobId);
const job = await datasetSyncQueue.getJob(jobId);
if (!job) {
return {
status: DatasetStatusEnum.active,
@@ -76,10 +76,10 @@ export const getWebsiteSyncDatasetStatus = async (datasetId: string) => {
// Scheduler setting
const repeatDuration = 24 * 60 * 60 * 1000; // every day
export const upsertWebsiteSyncJobScheduler = (data: WebsiteSyncJobData, startDate?: number) => {
export const upsertDatasetSyncJobScheduler = (data: DatasetSyncJobData, startDate?: number) => {
const datasetId = String(data.datasetId);
return websiteSyncQueue.upsertJobScheduler(
return datasetSyncQueue.upsertJobScheduler(
datasetId,
{
every: repeatDuration,
@@ -92,10 +92,10 @@ export const upsertWebsiteSyncJobScheduler = (data: WebsiteSyncJobData, startDat
);
};
export const getWebsiteSyncJobScheduler = (datasetId: string) => {
return websiteSyncQueue.getJobScheduler(String(datasetId));
export const getDatasetSyncJobScheduler = (datasetId: string) => {
return datasetSyncQueue.getJobScheduler(String(datasetId));
};
export const removeWebsiteSyncJobScheduler = (datasetId: string) => {
return websiteSyncQueue.removeJobScheduler(String(datasetId));
export const removeDatasetSyncJobScheduler = (datasetId: string) => {
return datasetSyncQueue.removeJobScheduler(String(datasetId));
};