mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 15:41:05 +00:00
feature: 4.10.1 (#5201)
* add dynamic inputRender (#5127) * dynamic input component * fix * fix * fix * perf: dynamic render input * update doc * perf: error catch * num input ui * fix form render (#5177) * perf: i18n check * add log * doc * Sync dataset (#5181) * perf: api dataset create (#5047) * Sync dataset (#5120) * add * wait * restructure dataset sync, update types and APIs, add sync hints, and remove legacy logic * feat: add function to retrieve real file ID from third-party doc library and rename team permission check function for clarity * fix come console * refactor: rename team dataset limit check functions for clarity, update API dataset sync limit usage, and rename root directory to "ROOT_FOLDER" * frat: update sync dataset login * fix delete.ts * feat: update pnpm-lock.yaml to include bullmq, fix comments in api.d.ts and type.d.ts, rename API file ID field, optimize dataset sync logic, and add website sync feature with related APIs * feat: update CollectionCard to support site dataset sync, add API root ID constant and init sync API * feat: add RootCollectionId constant to replace hardcoded root ID --------- Co-authored-by: dreamer6680 <146868355@qq.com> * perf: code * feat: update success message for dataset sync, revise related i18n texts, and optimize file selection logic (#5166) Co-authored-by: dreamer6680 <146868355@qq.com> * perf: select file * Sync dataset (#5180) * feat: update success message for dataset sync, revise related i18n texts, and optimize file selection logic * fix: make listfile function return rawid string --------- Co-authored-by: dreamer6680 <146868355@qq.com> * init sh * fix: ts --------- Co-authored-by: dreamer6680 <1468683855@qq.com> Co-authored-by: dreamer6680 <146868355@qq.com> * update doc * i18n --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: dreamer6680 <1468683855@qq.com> Co-authored-by: dreamer6680 <146868355@qq.com>
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import type {
|
||||
APIFileListResponse,
|
||||
ApiFileReadContentResponse,
|
||||
APIFileReadResponse,
|
||||
ApiDatasetDetailResponse,
|
||||
@@ -19,6 +18,16 @@ type ResponseDataType = {
|
||||
data: any;
|
||||
};
|
||||
|
||||
type APIFileListResponse = {
|
||||
id: string;
|
||||
parentId: ParentIdType;
|
||||
name: string;
|
||||
type: 'file' | 'folder';
|
||||
updateTime: Date;
|
||||
createTime: Date;
|
||||
hasChild?: boolean;
|
||||
};
|
||||
|
||||
export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }) => {
|
||||
const instance = axios.create({
|
||||
baseURL: apiServer.baseUrl,
|
||||
@@ -106,6 +115,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
|
||||
|
||||
const formattedFiles = files.map((file) => ({
|
||||
...file,
|
||||
rawId: file.id,
|
||||
hasChild: file.hasChild ?? file.type === 'folder'
|
||||
}));
|
||||
|
||||
@@ -201,18 +211,27 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
|
||||
if (fileData) {
|
||||
return {
|
||||
id: fileData.id,
|
||||
rawId: apiFileId,
|
||||
name: fileData.name,
|
||||
parentId: fileData.parentId === null ? '' : fileData.parentId
|
||||
parentId: fileData.parentId === null ? '' : fileData.parentId,
|
||||
type: fileData.type,
|
||||
updateTime: fileData.updateTime,
|
||||
createTime: fileData.createTime
|
||||
};
|
||||
}
|
||||
|
||||
return Promise.reject('File not found');
|
||||
};
|
||||
|
||||
const getFileRawId = (fileId: string) => {
|
||||
return fileId;
|
||||
};
|
||||
|
||||
return {
|
||||
getFileContent,
|
||||
listFiles,
|
||||
getFilePreviewUrl,
|
||||
getFileDetail
|
||||
getFileDetail,
|
||||
getFileRawId
|
||||
};
|
||||
};
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type {
|
||||
APIFileItem,
|
||||
APIFileItemType,
|
||||
ApiFileReadContentResponse,
|
||||
ApiDatasetDetailResponse,
|
||||
FeishuServer
|
||||
@@ -104,7 +104,11 @@ export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: Feishu
|
||||
.catch((err) => responseError(err));
|
||||
};
|
||||
|
||||
const listFiles = async ({ parentId }: { parentId?: ParentIdType }): Promise<APIFileItem[]> => {
|
||||
const listFiles = async ({
|
||||
parentId
|
||||
}: {
|
||||
parentId?: ParentIdType;
|
||||
}): Promise<APIFileItemType[]> => {
|
||||
const fetchFiles = async (pageToken?: string): Promise<FeishuFileListResponse['files']> => {
|
||||
const data = await request<FeishuFileListResponse>(
|
||||
`/open-apis/drive/v1/files`,
|
||||
@@ -130,6 +134,7 @@ export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: Feishu
|
||||
.filter((file) => ['folder', 'docx'].includes(file.type))
|
||||
.map((file) => ({
|
||||
id: file.token,
|
||||
rawId: file.token,
|
||||
parentId: file.parent_token,
|
||||
name: file.name,
|
||||
type: file.type === 'folder' ? ('folder' as const) : ('file' as const),
|
||||
@@ -186,23 +191,33 @@ export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: Feishu
|
||||
}: {
|
||||
apiFileId: string;
|
||||
}): Promise<ApiDatasetDetailResponse> => {
|
||||
const { document } = await request<{ document: { title: string } }>(
|
||||
const { document } = await request<{ document: { title: string; type: string } }>(
|
||||
`/open-apis/docx/v1/documents/${apiFileId}`,
|
||||
{},
|
||||
'GET'
|
||||
);
|
||||
|
||||
return {
|
||||
rawId: apiFileId,
|
||||
name: document?.title,
|
||||
parentId: null,
|
||||
id: apiFileId
|
||||
id: apiFileId,
|
||||
type: document.type === 'folder' ? ('folder' as const) : ('file' as const),
|
||||
hasChild: document.type === 'folder',
|
||||
updateTime: new Date(),
|
||||
createTime: new Date()
|
||||
};
|
||||
};
|
||||
|
||||
const getFileRawId = (fileId: string) => {
|
||||
return fileId;
|
||||
};
|
||||
|
||||
return {
|
||||
getFileContent,
|
||||
listFiles,
|
||||
getFilePreviewUrl,
|
||||
getFileDetail
|
||||
getFileDetail,
|
||||
getFileRawId
|
||||
};
|
||||
};
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type {
|
||||
APIFileItem,
|
||||
APIFileItemType,
|
||||
ApiFileReadContentResponse,
|
||||
YuqueServer,
|
||||
ApiDatasetDetailResponse
|
||||
@@ -106,7 +106,7 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
if (yuqueServer.basePath) parentId = yuqueServer.basePath;
|
||||
}
|
||||
|
||||
let files: APIFileItem[] = [];
|
||||
let files: APIFileItemType[] = [];
|
||||
|
||||
if (!parentId) {
|
||||
const limit = 100;
|
||||
@@ -133,7 +133,8 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
|
||||
files = allData.map((item) => {
|
||||
return {
|
||||
id: item.id,
|
||||
id: String(item.id),
|
||||
rawId: String(item.id),
|
||||
name: item.name,
|
||||
parentId: null,
|
||||
type: 'folder',
|
||||
@@ -144,7 +145,8 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
};
|
||||
});
|
||||
} else {
|
||||
if (typeof parentId === 'number') {
|
||||
const numParentId = Number(parentId);
|
||||
if (!isNaN(numParentId)) {
|
||||
const data = await request<YuqueTocListResponse>(
|
||||
`/api/v2/repos/${parentId}/toc`,
|
||||
{},
|
||||
@@ -155,6 +157,7 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
.filter((item) => !item.parent_uuid && item.type !== 'LINK')
|
||||
.map((item) => ({
|
||||
id: `${parentId}-${item.id}-${item.uuid}`,
|
||||
rawId: String(item.uuid),
|
||||
name: item.title,
|
||||
parentId: item.parent_uuid,
|
||||
type: item.type === 'TITLE' ? ('folder' as const) : ('file' as const),
|
||||
@@ -167,11 +170,11 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
} else {
|
||||
const [repoId, uuid, parentUuid] = parentId.split(/-(.*?)-(.*)/);
|
||||
const data = await request<YuqueTocListResponse>(`/api/v2/repos/${repoId}/toc`, {}, 'GET');
|
||||
|
||||
return data
|
||||
.filter((item) => item.parent_uuid === parentUuid)
|
||||
.map((item) => ({
|
||||
id: `${repoId}-${item.id}-${item.uuid}`,
|
||||
rawId: String(item.uuid),
|
||||
name: item.title,
|
||||
parentId: item.parent_uuid,
|
||||
type: item.type === 'TITLE' ? ('folder' as const) : ('file' as const),
|
||||
@@ -207,6 +210,10 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
'GET'
|
||||
);
|
||||
|
||||
if (!data.title) {
|
||||
return Promise.reject('Cannot find the file');
|
||||
}
|
||||
|
||||
return {
|
||||
title: data.title,
|
||||
rawText: data.body
|
||||
@@ -266,8 +273,13 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
}
|
||||
return {
|
||||
id: file.id,
|
||||
rawId: file.id,
|
||||
name: file.name,
|
||||
parentId: null
|
||||
parentId: null,
|
||||
type: file.type === 'TITLE' ? ('folder' as const) : ('file' as const),
|
||||
updateTime: file.updated_at,
|
||||
createTime: file.created_at,
|
||||
hasChild: true
|
||||
};
|
||||
} else {
|
||||
const [repoId, parentUuid, fileId] = apiFileId.split(/-(.*?)-(.*)/);
|
||||
@@ -283,23 +295,43 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
if (file.parent_uuid) {
|
||||
return {
|
||||
id: file.id,
|
||||
rawId: file.id,
|
||||
name: file.title,
|
||||
parentId: parentId
|
||||
parentId: parentId,
|
||||
type: file.type === 'TITLE' ? ('folder' as const) : ('file' as const),
|
||||
updateTime: new Date(),
|
||||
createTime: new Date(),
|
||||
hasChild: !!file.child_uuid
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
id: file.id,
|
||||
rawId: file.id,
|
||||
name: file.title,
|
||||
parentId: repoId
|
||||
parentId: repoId,
|
||||
type: file.type === 'TITLE' ? ('folder' as const) : ('file' as const),
|
||||
updateTime: new Date(),
|
||||
createTime: new Date(),
|
||||
hasChild: !!file.child_uuid
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const getFileRawId = (fileId: string) => {
|
||||
const [repoId, parentUuid, fileUuid] = fileId.split(/-(.*?)-(.*)/);
|
||||
if (fileUuid) {
|
||||
return `${fileUuid}`;
|
||||
} else {
|
||||
return `${repoId}`;
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
getFileContent,
|
||||
listFiles,
|
||||
getFilePreviewUrl,
|
||||
getFileDetail
|
||||
getFileDetail,
|
||||
getFileRawId
|
||||
};
|
||||
};
|
||||
|
@@ -180,18 +180,6 @@ export const createCollectionAndInsertData = async ({
|
||||
|
||||
hashRawText: rawText ? hashStr(rawText) : undefined,
|
||||
rawTextLength: rawText?.length,
|
||||
nextSyncTime: (() => {
|
||||
// ignore auto collections sync for website datasets
|
||||
if (!dataset.autoSync && dataset.type === DatasetTypeEnum.websiteDataset) return undefined;
|
||||
if (
|
||||
[DatasetCollectionTypeEnum.link, DatasetCollectionTypeEnum.apiFile].includes(
|
||||
formatCreateCollectionParams.type
|
||||
)
|
||||
) {
|
||||
return addDays(new Date(), 1);
|
||||
}
|
||||
return undefined;
|
||||
})(),
|
||||
session
|
||||
});
|
||||
|
||||
@@ -285,7 +273,8 @@ export async function createOneCollection({ session, ...props }: CreateOneCollec
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
apiFileId
|
||||
apiFileId,
|
||||
apiFileParentId
|
||||
} = props;
|
||||
|
||||
const collectionTags = await createOrGetCollectionTags({
|
||||
@@ -310,7 +299,8 @@ export async function createOneCollection({ session, ...props }: CreateOneCollec
|
||||
...(rawLink ? { rawLink } : {}),
|
||||
...(externalFileId ? { externalFileId } : {}),
|
||||
...(externalFileUrl ? { externalFileUrl } : {}),
|
||||
...(apiFileId ? { apiFileId } : {})
|
||||
...(apiFileId ? { apiFileId } : {}),
|
||||
...(apiFileParentId ? { apiFileParentId } : {})
|
||||
}
|
||||
],
|
||||
{ session, ordered: true }
|
||||
|
@@ -78,11 +78,10 @@ const DatasetCollectionSchema = new Schema({
|
||||
},
|
||||
|
||||
forbid: Boolean,
|
||||
// next sync time
|
||||
nextSyncTime: Date,
|
||||
|
||||
// Parse settings
|
||||
customPdfParse: Boolean,
|
||||
apiFileParentId: String,
|
||||
|
||||
// Chunk settings
|
||||
...ChunkSettings
|
||||
@@ -112,16 +111,6 @@ try {
|
||||
// create time filter
|
||||
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 });
|
||||
|
||||
// next sync time filter
|
||||
DatasetCollectionSchema.index(
|
||||
{ type: 1, nextSyncTime: -1 },
|
||||
{
|
||||
partialFilterExpression: {
|
||||
nextSyncTime: { $exists: true }
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Get collection by external file id
|
||||
DatasetCollectionSchema.index(
|
||||
{ datasetId: 1, externalFileId: 1 },
|
||||
|
@@ -173,37 +173,39 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
|
||||
|
||||
// Check if the original text is the same: skip if same
|
||||
const hashRawText = hashStr(rawText);
|
||||
if (collection.hashRawText && hashRawText === collection.hashRawText) {
|
||||
return DatasetCollectionSyncResultEnum.sameRaw;
|
||||
if (collection.hashRawText && hashRawText !== collection.hashRawText) {
|
||||
await mongoSessionRun(async (session) => {
|
||||
// Delete old collection
|
||||
await delCollection({
|
||||
collections: [collection],
|
||||
delImg: false,
|
||||
delFile: false,
|
||||
session
|
||||
});
|
||||
|
||||
// Create new collection
|
||||
await createCollectionAndInsertData({
|
||||
session,
|
||||
dataset,
|
||||
rawText: rawText,
|
||||
createCollectionParams: {
|
||||
...collection,
|
||||
name: title || collection.name,
|
||||
updateTime: new Date(),
|
||||
tags: await collectionTagsToTagLabel({
|
||||
datasetId: collection.datasetId,
|
||||
tags: collection.tags
|
||||
})
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return DatasetCollectionSyncResultEnum.success;
|
||||
} else if (collection.name !== title) {
|
||||
await MongoDatasetCollection.updateOne({ _id: collection._id }, { $set: { name: title } });
|
||||
return DatasetCollectionSyncResultEnum.success;
|
||||
}
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// Delete old collection
|
||||
await delCollection({
|
||||
collections: [collection],
|
||||
delImg: false,
|
||||
delFile: false,
|
||||
session
|
||||
});
|
||||
|
||||
// Create new collection
|
||||
await createCollectionAndInsertData({
|
||||
session,
|
||||
dataset,
|
||||
rawText: rawText,
|
||||
createCollectionParams: {
|
||||
...collection,
|
||||
name: title || collection.name,
|
||||
updateTime: new Date(),
|
||||
tags: await collectionTagsToTagLabel({
|
||||
datasetId: collection.datasetId,
|
||||
tags: collection.tags
|
||||
})
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return DatasetCollectionSyncResultEnum.success;
|
||||
return DatasetCollectionSyncResultEnum.sameRaw;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@@ -2,11 +2,11 @@ import { type Processor } from 'bullmq';
|
||||
import { getQueue, getWorker, QueueNames } from '../../../common/bullmq';
|
||||
import { DatasetStatusEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
export type WebsiteSyncJobData = {
|
||||
export type DatasetSyncJobData = {
|
||||
datasetId: string;
|
||||
};
|
||||
|
||||
export const websiteSyncQueue = getQueue<WebsiteSyncJobData>(QueueNames.websiteSync, {
|
||||
export const datasetSyncQueue = getQueue<DatasetSyncJobData>(QueueNames.datasetSync, {
|
||||
defaultJobOptions: {
|
||||
attempts: 3, // retry 3 times
|
||||
backoff: {
|
||||
@@ -15,8 +15,8 @@ export const websiteSyncQueue = getQueue<WebsiteSyncJobData>(QueueNames.websiteS
|
||||
}
|
||||
}
|
||||
});
|
||||
export const getWebsiteSyncWorker = (processor: Processor<WebsiteSyncJobData>) => {
|
||||
return getWorker<WebsiteSyncJobData>(QueueNames.websiteSync, processor, {
|
||||
export const getDatasetSyncWorker = (processor: Processor<DatasetSyncJobData>) => {
|
||||
return getWorker<DatasetSyncJobData>(QueueNames.datasetSync, processor, {
|
||||
removeOnFail: {
|
||||
age: 15 * 24 * 60 * 60, // Keep up to 15 days
|
||||
count: 1000 // Keep up to 1000 jobs
|
||||
@@ -25,21 +25,21 @@ export const getWebsiteSyncWorker = (processor: Processor<WebsiteSyncJobData>) =
|
||||
});
|
||||
};
|
||||
|
||||
export const addWebsiteSyncJob = (data: WebsiteSyncJobData) => {
|
||||
export const addDatasetSyncJob = (data: DatasetSyncJobData) => {
|
||||
const datasetId = String(data.datasetId);
|
||||
// deduplication: make sure only 1 job
|
||||
return websiteSyncQueue.add(datasetId, data, { deduplication: { id: datasetId } });
|
||||
return datasetSyncQueue.add(datasetId, data, { deduplication: { id: datasetId } });
|
||||
};
|
||||
|
||||
export const getWebsiteSyncDatasetStatus = async (datasetId: string) => {
|
||||
const jobId = await websiteSyncQueue.getDeduplicationJobId(datasetId);
|
||||
export const getDatasetSyncDatasetStatus = async (datasetId: string) => {
|
||||
const jobId = await datasetSyncQueue.getDeduplicationJobId(datasetId);
|
||||
if (!jobId) {
|
||||
return {
|
||||
status: DatasetStatusEnum.active,
|
||||
errorMsg: undefined
|
||||
};
|
||||
}
|
||||
const job = await websiteSyncQueue.getJob(jobId);
|
||||
const job = await datasetSyncQueue.getJob(jobId);
|
||||
if (!job) {
|
||||
return {
|
||||
status: DatasetStatusEnum.active,
|
||||
@@ -76,10 +76,10 @@ export const getWebsiteSyncDatasetStatus = async (datasetId: string) => {
|
||||
|
||||
// Scheduler setting
|
||||
const repeatDuration = 24 * 60 * 60 * 1000; // every day
|
||||
export const upsertWebsiteSyncJobScheduler = (data: WebsiteSyncJobData, startDate?: number) => {
|
||||
export const upsertDatasetSyncJobScheduler = (data: DatasetSyncJobData, startDate?: number) => {
|
||||
const datasetId = String(data.datasetId);
|
||||
|
||||
return websiteSyncQueue.upsertJobScheduler(
|
||||
return datasetSyncQueue.upsertJobScheduler(
|
||||
datasetId,
|
||||
{
|
||||
every: repeatDuration,
|
||||
@@ -92,10 +92,10 @@ export const upsertWebsiteSyncJobScheduler = (data: WebsiteSyncJobData, startDat
|
||||
);
|
||||
};
|
||||
|
||||
export const getWebsiteSyncJobScheduler = (datasetId: string) => {
|
||||
return websiteSyncQueue.getJobScheduler(String(datasetId));
|
||||
export const getDatasetSyncJobScheduler = (datasetId: string) => {
|
||||
return datasetSyncQueue.getJobScheduler(String(datasetId));
|
||||
};
|
||||
|
||||
export const removeWebsiteSyncJobScheduler = (datasetId: string) => {
|
||||
return websiteSyncQueue.removeJobScheduler(String(datasetId));
|
||||
export const removeDatasetSyncJobScheduler = (datasetId: string) => {
|
||||
return datasetSyncQueue.removeJobScheduler(String(datasetId));
|
||||
};
|
Reference in New Issue
Block a user