feat: export all chunks in collection (#6163)

* feat: export all chunks in collection

* perf: export collection api

* doc

---------

Co-authored-by: archer <545436317@qq.com>
This commit is contained in:
Roy
2025-12-30 10:57:22 +08:00
committed by GitHub
parent baf1a07993
commit 5ff4cc11b0
17 changed files with 155 additions and 61 deletions

View File

@@ -9,6 +9,7 @@ description: 'FastGPT V4.14.5 更新说明'
1. 对话记录使用侧改成软删除,增加从日志管理里删除对话记录。
2. 更新Agent/工具时,会更新其上层所有目录的更新时间,以便其会排在列表前面。
3. 门户页支持配置单个应用运行可见度配。
4. 导出单个知识库集合分块接口。
## ⚙️ 优化

View File

@@ -120,7 +120,7 @@
"document/content/docs/upgrading/4-14/4142.mdx": "2025-11-18T19:27:14+08:00",
"document/content/docs/upgrading/4-14/4143.mdx": "2025-11-26T20:52:05+08:00",
"document/content/docs/upgrading/4-14/4144.mdx": "2025-12-16T14:56:04+08:00",
"document/content/docs/upgrading/4-14/4145.mdx": "2025-12-21T23:28:19+08:00",
"document/content/docs/upgrading/4-14/4145.mdx": "2025-12-24T14:28:42+08:00",
"document/content/docs/upgrading/4-8/40.mdx": "2025-08-02T19:38:37+08:00",
"document/content/docs/upgrading/4-8/41.mdx": "2025-08-02T19:38:37+08:00",
"document/content/docs/upgrading/4-8/42.mdx": "2025-08-02T19:38:37+08:00",

View File

@@ -1,4 +1,4 @@
import { OutLinkChatAuthSchema } from '../../../../support/permission/chat/type';
import { OutLinkChatAuthSchema } from '../../../../support/permission/chat';
import { ObjectIdSchema } from '../../../../common/type/mongo';
import z from 'zod';

View File

@@ -0,0 +1,38 @@
import { ObjectIdSchema } from '../../../../common/type/mongo';
import { OutLinkChatAuthSchema } from '../../../../support/permission/chat';
import z from 'zod';
// Schema 1: Basic collection export with authentication
const BasicExportSchema = z
.object({
collectionId: ObjectIdSchema.describe('集合ID')
})
.meta({
description: '通过身份鉴权导出集合',
example: {
collectionId: '1234567890'
}
});
// Schema 2: Export from chat context with outlink authentication
const ChatExportSchema = OutLinkChatAuthSchema.extend({
collectionId: ObjectIdSchema.describe('集合ID'),
appId: ObjectIdSchema.describe('应用ID'),
chatId: ObjectIdSchema.describe('会话ID'),
chatItemDataId: z.string().describe('对话ID'),
chatTime: z.coerce.date().optional().describe('对话时间')
}).meta({
description: '对话中导出集合,可通过 chatId 等身份信息',
example: {
collectionId: '1234567890',
appId: '1234567890',
chatId: '1234567890',
chatItemDataId: '1234567890',
chatTime: '2025-12-30T00:00:00.000Z',
shareId: '1234567890',
outLinkUid: '1234567890'
}
});
export const ExportCollectionBodySchema = z.union([BasicExportSchema, ChatExportSchema]);
export type ExportCollectionBodyType = z.infer<typeof ExportCollectionBodySchema>;

View File

@@ -0,0 +1,25 @@
import type { OpenAPIPath } from '../../../type';
import { TagsMap } from '../../../tag';
import { ExportCollectionBodySchema } from './api';
export const DatasetCollectionPath: OpenAPIPath = {
'/core/dataset/collection/export': {
post: {
summary: '下载集合的所有数据块',
description: '下载集合的所有数据块',
tags: [TagsMap.datasetCollection],
requestBody: {
content: {
'application/json': {
schema: ExportCollectionBodySchema
}
}
},
responses: {
200: {
description: '成功导出并下载集合的所有数据块内容'
}
}
}
}
};

View File

@@ -0,0 +1 @@
import { z } from 'zod';

View File

@@ -0,0 +1,4 @@
import type { OpenAPIPath } from '../../../type';
import { TagsMap } from '../../../tag';
export const DatasetDataPath: OpenAPIPath = {};

View File

@@ -0,0 +1,8 @@
import type { OpenAPIPath } from '../../type';
import { DatasetDataPath } from './data';
import { DatasetCollectionPath } from './collection';
export const DatasetPath: OpenAPIPath = {
...DatasetDataPath,
...DatasetCollectionPath
};

View File

@@ -4,6 +4,7 @@ import { TagsMap } from './tag';
import { PluginPath } from './core/plugin';
import { AppPath } from './core/app';
import { SupportPath } from './support';
import { DatasetPath } from './core/dataset';
export const openAPIDocument = createDocument({
openapi: '3.1.0',
@@ -15,6 +16,7 @@ export const openAPIDocument = createDocument({
paths: {
...AppPath,
...ChatPath,
...DatasetPath,
...PluginPath,
...SupportPath
},
@@ -28,6 +30,10 @@ export const openAPIDocument = createDocument({
name: '对话管理',
tags: [TagsMap.chatHistory, TagsMap.chatPage, TagsMap.chatFeedback, TagsMap.chatSetting]
},
{
name: '知识库',
tags: [TagsMap.datasetCollection]
},
{
name: '插件系统',
tags: [TagsMap.pluginToolTag, TagsMap.pluginTeam]

View File

@@ -10,6 +10,10 @@ export const TagsMap = {
chatSetting: '门户页配置',
chatFeedback: '对话反馈',
// Dataset
datasetCollection: '集合',
datasetData: '数据',
// Plugin
pluginToolTag: '工具标签',
pluginTeam: '团队插件管理',

View File

@@ -1,29 +0,0 @@
import { z } from 'zod';
export const OutLinkChatAuthSchema = z.union([
z
.object({
shareId: z.string().optional(),
outLinkUid: z.string().optional()
})
.meta({
description: '分享链接鉴权',
example: {
shareId: '1234567890',
outLinkUid: '1234567890'
}
}),
z
.object({
teamId: z.string().optional(),
teamToken: z.string().optional()
})
.meta({
description: '团队鉴权',
example: {
teamId: '1234567890',
teamToken: '1234567890'
}
})
]);
export type OutLinkChatAuthType = z.infer<typeof OutLinkChatAuthSchema>;

View File

@@ -26,6 +26,8 @@
"close_auto_sync": "Are you sure you want to turn off automatic sync?",
"collection.Create update time": "Creation/Update Time",
"collection.Training type": "Training",
"collection.export_all_chunks": "Export chunks",
"collection.not_found": "Collection does not exist",
"collection.sync.submit": "The synchronization task has been submitted",
"collection.training_type": "Chunk type",
"collection_data_count": "Data amount",

View File

@@ -25,7 +25,9 @@
"chunk_trigger_tips": "当满足一定条件时才触发分块存储,否则会直接完整存储原文",
"close_auto_sync": "确认关闭自动同步功能?",
"collection.Create update time": "创建/更新时间",
"collection.not_found": "集合不存在",
"collection.Training type": "训练模式",
"collection.export_all_chunks": "导出分块",
"collection.sync.submit": "已提交同步任务",
"collection.training_type": "处理模式",
"collection_data_count": "数据量",

View File

@@ -26,6 +26,8 @@
"close_auto_sync": "確認關閉自動同步功能?",
"collection.Create update time": "建立/更新時間",
"collection.Training type": "分段模式",
"collection.export_all_chunks": "導出分塊",
"collection.not_found": "集合不存在",
"collection.sync.submit": "已提交同步任務",
"collection.training_type": "處理模式",
"collection_data_count": "資料量",

View File

@@ -38,6 +38,7 @@ import PopoverConfirm from '@fastgpt/web/components/common/MyPopover/PopoverConf
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyImage from '@fastgpt/web/components/common/Image/MyImage';
import dynamic from 'next/dynamic';
import { downloadFetch } from '@/web/common/system/utils';
const InsertImagesModal = dynamic(() => import('./data/InsertImageModal'), {
ssr: false
@@ -128,6 +129,21 @@ const DataCard = () => {
}
});
const { runAsync: onExportAllChunks, loading: isExportChunksLoading } = useRequest2(
async (collectionId: string) => {
await downloadFetch({
url: '/api/core/dataset/collection/export',
filename: `${collection?.name}.csv`,
body: {
collectionId
}
});
},
{
manual: true
}
);
return (
<MyBox py={[1, 0]} h={'100%'}>
<Flex flexDirection={'column'} h={'100%'}>
@@ -155,6 +171,19 @@ const DataCard = () => {
<TagsPopOver currentCollection={collection} />
)}
</Box>
<Button
variant={'whitePrimary'}
size={['sm', 'md']}
isDisabled={!collection}
isLoading={isExportChunksLoading}
onClick={() => {
onExportAllChunks(collection?._id!);
}}
>
{t('dataset:collection.export_all_chunks')}
</Button>
{datasetDetail.type !== 'websiteDataset' &&
!!collection?.chunkSize &&
collection.permission?.hasWritePer && (
@@ -382,6 +411,7 @@ const DataCard = () => {
</>
)}
</Flex>
{canWrite && (
<PopoverConfirm
Trigger={

View File

@@ -1,7 +1,6 @@
import { NextAPI } from '@/service/middleware/entry';
import { authChatCrud, authCollectionInChat } from '@/service/support/permission/auth/chat';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { type OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { useIPFrequencyLimit } from '@fastgpt/service/common/middle/reqFrequencyLimit';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
@@ -13,41 +12,36 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/datas
import { type ApiRequestProps } from '@fastgpt/service/type/next';
import { type NextApiResponse } from 'next';
import { sanitizeCsvField } from '@fastgpt/service/common/file/csv';
import { replaceS3KeyToPreviewUrl } from '@fastgpt/service/core/dataset/utils';
import { addDays } from 'date-fns';
import { ExportCollectionBodySchema } from '@fastgpt/global/openapi/core/dataset/collection/api';
export type ExportCollectionBody = {
collectionId: string;
async function handler(req: ApiRequestProps, res: NextApiResponse) {
const parseBody = ExportCollectionBodySchema.parse(req.body);
const collectionId = parseBody.collectionId;
appId?: string;
chatId?: string;
chatItemDataId?: string;
chatTime: Date;
} & OutLinkChatAuthProps;
async function handler(req: ApiRequestProps<ExportCollectionBody, {}>, res: NextApiResponse) {
const {
collectionId,
appId,
chatId,
chatItemDataId,
shareId,
outLinkUid,
teamId,
teamToken,
collection,
teamId: userTeamId,
chatTime
} = req.body;
const { collection, teamId: userTeamId } = await (async () => {
if (!appId || !chatId || !chatItemDataId) {
return authDatasetCollection({
} = await (async () => {
if (!('chatItemDataId' in parseBody)) {
const result = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId: req.body.collectionId,
collectionId,
per: ReadPermissionVal
});
return {
...result,
chatTime: undefined
};
}
/*
const { appId, chatId, chatItemDataId, shareId, outLinkUid, teamId, teamToken, chatTime } =
parseBody;
/*
1. auth chat read permission
2. auth collection quote in chat
3. auth outlink open show quote
@@ -73,7 +67,8 @@ async function handler(req: ApiRequestProps<ExportCollectionBody, {}>, res: Next
return {
...authRes,
collection
collection,
chatTime
};
})();
@@ -107,11 +102,17 @@ async function handler(req: ApiRequestProps<ExportCollectionBody, {}>, res: Next
readStream: cursor
});
write(`\uFEFFindex,content`);
write(`\uFEFFq,a`);
cursor.on('data', (doc) => {
const sanitizedQ = sanitizeCsvField(doc.q || '');
const sanitizedA = sanitizeCsvField(doc.a || '');
const sanitizedQ = replaceS3KeyToPreviewUrl(
sanitizeCsvField(doc.q || ''),
addDays(new Date(), 90)
);
const sanitizedA = replaceS3KeyToPreviewUrl(
sanitizeCsvField(doc.a || ''),
addDays(new Date(), 90)
);
write(`\n${sanitizedQ},${sanitizedA}`);
});

View File

@@ -83,7 +83,6 @@ import type {
DatasetCreateWithFilesBody,
DatasetCreateWithFilesResponse
} from '@/pages/api/core/dataset/createWithFiles';
import type { PresignDatasetFileGetUrlParams } from '@fastgpt/global/core/dataset/v2/api';
/* ======================== dataset ======================= */
export const getDatasets = (data: GetDatasetListBody) =>