mirror of
https://github.com/labring/FastGPT.git
synced 2026-04-05 01:05:10 +08:00
feat: export all chunks in collection (#6163)
* feat: export all chunks in collection * perf: export collection api * doc --------- Co-authored-by: archer <545436317@qq.com>
This commit is contained in:
@@ -9,6 +9,7 @@ description: 'FastGPT V4.14.5 更新说明'
|
||||
1. 对话记录使用侧改成软删除,增加从日志管理里删除对话记录。
|
||||
2. 更新Agent/工具时,会更新其上层所有目录的更新时间,以便其会排在列表前面。
|
||||
3. 门户页支持配置单个应用运行可见度配。
|
||||
4. 导出单个知识库集合分块接口。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
|
||||
@@ -120,7 +120,7 @@
|
||||
"document/content/docs/upgrading/4-14/4142.mdx": "2025-11-18T19:27:14+08:00",
|
||||
"document/content/docs/upgrading/4-14/4143.mdx": "2025-11-26T20:52:05+08:00",
|
||||
"document/content/docs/upgrading/4-14/4144.mdx": "2025-12-16T14:56:04+08:00",
|
||||
"document/content/docs/upgrading/4-14/4145.mdx": "2025-12-21T23:28:19+08:00",
|
||||
"document/content/docs/upgrading/4-14/4145.mdx": "2025-12-24T14:28:42+08:00",
|
||||
"document/content/docs/upgrading/4-8/40.mdx": "2025-08-02T19:38:37+08:00",
|
||||
"document/content/docs/upgrading/4-8/41.mdx": "2025-08-02T19:38:37+08:00",
|
||||
"document/content/docs/upgrading/4-8/42.mdx": "2025-08-02T19:38:37+08:00",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { OutLinkChatAuthSchema } from '../../../../support/permission/chat/type';
|
||||
import { OutLinkChatAuthSchema } from '../../../../support/permission/chat';
|
||||
import { ObjectIdSchema } from '../../../../common/type/mongo';
|
||||
import z from 'zod';
|
||||
|
||||
|
||||
38
packages/global/openapi/core/dataset/collection/api.ts
Normal file
38
packages/global/openapi/core/dataset/collection/api.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { ObjectIdSchema } from '../../../../common/type/mongo';
|
||||
import { OutLinkChatAuthSchema } from '../../../../support/permission/chat';
|
||||
import z from 'zod';
|
||||
|
||||
// Schema 1: Basic collection export with authentication
|
||||
const BasicExportSchema = z
|
||||
.object({
|
||||
collectionId: ObjectIdSchema.describe('集合ID')
|
||||
})
|
||||
.meta({
|
||||
description: '通过身份鉴权导出集合',
|
||||
example: {
|
||||
collectionId: '1234567890'
|
||||
}
|
||||
});
|
||||
|
||||
// Schema 2: Export from chat context with outlink authentication
|
||||
const ChatExportSchema = OutLinkChatAuthSchema.extend({
|
||||
collectionId: ObjectIdSchema.describe('集合ID'),
|
||||
appId: ObjectIdSchema.describe('应用ID'),
|
||||
chatId: ObjectIdSchema.describe('会话ID'),
|
||||
chatItemDataId: z.string().describe('对话ID'),
|
||||
chatTime: z.coerce.date().optional().describe('对话时间')
|
||||
}).meta({
|
||||
description: '对话中导出集合,可通过 chatId 等身份信息',
|
||||
example: {
|
||||
collectionId: '1234567890',
|
||||
appId: '1234567890',
|
||||
chatId: '1234567890',
|
||||
chatItemDataId: '1234567890',
|
||||
chatTime: '2025-12-30T00:00:00.000Z',
|
||||
shareId: '1234567890',
|
||||
outLinkUid: '1234567890'
|
||||
}
|
||||
});
|
||||
|
||||
export const ExportCollectionBodySchema = z.union([BasicExportSchema, ChatExportSchema]);
|
||||
export type ExportCollectionBodyType = z.infer<typeof ExportCollectionBodySchema>;
|
||||
25
packages/global/openapi/core/dataset/collection/index.ts
Normal file
25
packages/global/openapi/core/dataset/collection/index.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import type { OpenAPIPath } from '../../../type';
|
||||
import { TagsMap } from '../../../tag';
|
||||
import { ExportCollectionBodySchema } from './api';
|
||||
|
||||
export const DatasetCollectionPath: OpenAPIPath = {
|
||||
'/core/dataset/collection/export': {
|
||||
post: {
|
||||
summary: '下载集合的所有数据块',
|
||||
description: '下载集合的所有数据块',
|
||||
tags: [TagsMap.datasetCollection],
|
||||
requestBody: {
|
||||
content: {
|
||||
'application/json': {
|
||||
schema: ExportCollectionBodySchema
|
||||
}
|
||||
}
|
||||
},
|
||||
responses: {
|
||||
200: {
|
||||
description: '成功导出并下载集合的所有数据块内容'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
1
packages/global/openapi/core/dataset/data/api.ts
Normal file
1
packages/global/openapi/core/dataset/data/api.ts
Normal file
@@ -0,0 +1 @@
|
||||
import { z } from 'zod';
|
||||
4
packages/global/openapi/core/dataset/data/index.ts
Normal file
4
packages/global/openapi/core/dataset/data/index.ts
Normal file
@@ -0,0 +1,4 @@
|
||||
import type { OpenAPIPath } from '../../../type';
|
||||
import { TagsMap } from '../../../tag';
|
||||
|
||||
export const DatasetDataPath: OpenAPIPath = {};
|
||||
8
packages/global/openapi/core/dataset/index.ts
Normal file
8
packages/global/openapi/core/dataset/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import type { OpenAPIPath } from '../../type';
|
||||
import { DatasetDataPath } from './data';
|
||||
import { DatasetCollectionPath } from './collection';
|
||||
|
||||
export const DatasetPath: OpenAPIPath = {
|
||||
...DatasetDataPath,
|
||||
...DatasetCollectionPath
|
||||
};
|
||||
@@ -4,6 +4,7 @@ import { TagsMap } from './tag';
|
||||
import { PluginPath } from './core/plugin';
|
||||
import { AppPath } from './core/app';
|
||||
import { SupportPath } from './support';
|
||||
import { DatasetPath } from './core/dataset';
|
||||
|
||||
export const openAPIDocument = createDocument({
|
||||
openapi: '3.1.0',
|
||||
@@ -15,6 +16,7 @@ export const openAPIDocument = createDocument({
|
||||
paths: {
|
||||
...AppPath,
|
||||
...ChatPath,
|
||||
...DatasetPath,
|
||||
...PluginPath,
|
||||
...SupportPath
|
||||
},
|
||||
@@ -28,6 +30,10 @@ export const openAPIDocument = createDocument({
|
||||
name: '对话管理',
|
||||
tags: [TagsMap.chatHistory, TagsMap.chatPage, TagsMap.chatFeedback, TagsMap.chatSetting]
|
||||
},
|
||||
{
|
||||
name: '知识库',
|
||||
tags: [TagsMap.datasetCollection]
|
||||
},
|
||||
{
|
||||
name: '插件系统',
|
||||
tags: [TagsMap.pluginToolTag, TagsMap.pluginTeam]
|
||||
|
||||
@@ -10,6 +10,10 @@ export const TagsMap = {
|
||||
chatSetting: '门户页配置',
|
||||
chatFeedback: '对话反馈',
|
||||
|
||||
// Dataset
|
||||
datasetCollection: '集合',
|
||||
datasetData: '数据',
|
||||
|
||||
// Plugin
|
||||
pluginToolTag: '工具标签',
|
||||
pluginTeam: '团队插件管理',
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
export const OutLinkChatAuthSchema = z.union([
|
||||
z
|
||||
.object({
|
||||
shareId: z.string().optional(),
|
||||
outLinkUid: z.string().optional()
|
||||
})
|
||||
.meta({
|
||||
description: '分享链接鉴权',
|
||||
example: {
|
||||
shareId: '1234567890',
|
||||
outLinkUid: '1234567890'
|
||||
}
|
||||
}),
|
||||
z
|
||||
.object({
|
||||
teamId: z.string().optional(),
|
||||
teamToken: z.string().optional()
|
||||
})
|
||||
.meta({
|
||||
description: '团队鉴权',
|
||||
example: {
|
||||
teamId: '1234567890',
|
||||
teamToken: '1234567890'
|
||||
}
|
||||
})
|
||||
]);
|
||||
export type OutLinkChatAuthType = z.infer<typeof OutLinkChatAuthSchema>;
|
||||
@@ -26,6 +26,8 @@
|
||||
"close_auto_sync": "Are you sure you want to turn off automatic sync?",
|
||||
"collection.Create update time": "Creation/Update Time",
|
||||
"collection.Training type": "Training",
|
||||
"collection.export_all_chunks": "Export chunks",
|
||||
"collection.not_found": "Collection does not exist",
|
||||
"collection.sync.submit": "The synchronization task has been submitted",
|
||||
"collection.training_type": "Chunk type",
|
||||
"collection_data_count": "Data amount",
|
||||
|
||||
@@ -25,7 +25,9 @@
|
||||
"chunk_trigger_tips": "当满足一定条件时才触发分块存储,否则会直接完整存储原文",
|
||||
"close_auto_sync": "确认关闭自动同步功能?",
|
||||
"collection.Create update time": "创建/更新时间",
|
||||
"collection.not_found": "集合不存在",
|
||||
"collection.Training type": "训练模式",
|
||||
"collection.export_all_chunks": "导出分块",
|
||||
"collection.sync.submit": "已提交同步任务",
|
||||
"collection.training_type": "处理模式",
|
||||
"collection_data_count": "数据量",
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
"close_auto_sync": "確認關閉自動同步功能?",
|
||||
"collection.Create update time": "建立/更新時間",
|
||||
"collection.Training type": "分段模式",
|
||||
"collection.export_all_chunks": "導出分塊",
|
||||
"collection.not_found": "集合不存在",
|
||||
"collection.sync.submit": "已提交同步任務",
|
||||
"collection.training_type": "處理模式",
|
||||
"collection_data_count": "資料量",
|
||||
|
||||
@@ -38,6 +38,7 @@ import PopoverConfirm from '@fastgpt/web/components/common/MyPopover/PopoverConf
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import MyImage from '@fastgpt/web/components/common/Image/MyImage';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { downloadFetch } from '@/web/common/system/utils';
|
||||
|
||||
const InsertImagesModal = dynamic(() => import('./data/InsertImageModal'), {
|
||||
ssr: false
|
||||
@@ -128,6 +129,21 @@ const DataCard = () => {
|
||||
}
|
||||
});
|
||||
|
||||
const { runAsync: onExportAllChunks, loading: isExportChunksLoading } = useRequest2(
|
||||
async (collectionId: string) => {
|
||||
await downloadFetch({
|
||||
url: '/api/core/dataset/collection/export',
|
||||
filename: `${collection?.name}.csv`,
|
||||
body: {
|
||||
collectionId
|
||||
}
|
||||
});
|
||||
},
|
||||
{
|
||||
manual: true
|
||||
}
|
||||
);
|
||||
|
||||
return (
|
||||
<MyBox py={[1, 0]} h={'100%'}>
|
||||
<Flex flexDirection={'column'} h={'100%'}>
|
||||
@@ -155,6 +171,19 @@ const DataCard = () => {
|
||||
<TagsPopOver currentCollection={collection} />
|
||||
)}
|
||||
</Box>
|
||||
|
||||
<Button
|
||||
variant={'whitePrimary'}
|
||||
size={['sm', 'md']}
|
||||
isDisabled={!collection}
|
||||
isLoading={isExportChunksLoading}
|
||||
onClick={() => {
|
||||
onExportAllChunks(collection?._id!);
|
||||
}}
|
||||
>
|
||||
{t('dataset:collection.export_all_chunks')}
|
||||
</Button>
|
||||
|
||||
{datasetDetail.type !== 'websiteDataset' &&
|
||||
!!collection?.chunkSize &&
|
||||
collection.permission?.hasWritePer && (
|
||||
@@ -382,6 +411,7 @@ const DataCard = () => {
|
||||
</>
|
||||
)}
|
||||
</Flex>
|
||||
|
||||
{canWrite && (
|
||||
<PopoverConfirm
|
||||
Trigger={
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authChatCrud, authCollectionInChat } from '@/service/support/permission/auth/chat';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { type OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { useIPFrequencyLimit } from '@fastgpt/service/common/middle/reqFrequencyLimit';
|
||||
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
|
||||
@@ -13,41 +12,36 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/datas
|
||||
import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { type NextApiResponse } from 'next';
|
||||
import { sanitizeCsvField } from '@fastgpt/service/common/file/csv';
|
||||
import { replaceS3KeyToPreviewUrl } from '@fastgpt/service/core/dataset/utils';
|
||||
import { addDays } from 'date-fns';
|
||||
import { ExportCollectionBodySchema } from '@fastgpt/global/openapi/core/dataset/collection/api';
|
||||
|
||||
export type ExportCollectionBody = {
|
||||
collectionId: string;
|
||||
async function handler(req: ApiRequestProps, res: NextApiResponse) {
|
||||
const parseBody = ExportCollectionBodySchema.parse(req.body);
|
||||
const collectionId = parseBody.collectionId;
|
||||
|
||||
appId?: string;
|
||||
chatId?: string;
|
||||
chatItemDataId?: string;
|
||||
chatTime: Date;
|
||||
} & OutLinkChatAuthProps;
|
||||
|
||||
async function handler(req: ApiRequestProps<ExportCollectionBody, {}>, res: NextApiResponse) {
|
||||
const {
|
||||
collectionId,
|
||||
appId,
|
||||
chatId,
|
||||
chatItemDataId,
|
||||
shareId,
|
||||
outLinkUid,
|
||||
teamId,
|
||||
teamToken,
|
||||
collection,
|
||||
teamId: userTeamId,
|
||||
chatTime
|
||||
} = req.body;
|
||||
|
||||
const { collection, teamId: userTeamId } = await (async () => {
|
||||
if (!appId || !chatId || !chatItemDataId) {
|
||||
return authDatasetCollection({
|
||||
} = await (async () => {
|
||||
if (!('chatItemDataId' in parseBody)) {
|
||||
const result = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
collectionId: req.body.collectionId,
|
||||
collectionId,
|
||||
per: ReadPermissionVal
|
||||
});
|
||||
return {
|
||||
...result,
|
||||
chatTime: undefined
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
const { appId, chatId, chatItemDataId, shareId, outLinkUid, teamId, teamToken, chatTime } =
|
||||
parseBody;
|
||||
/*
|
||||
1. auth chat read permission
|
||||
2. auth collection quote in chat
|
||||
3. auth outlink open show quote
|
||||
@@ -73,7 +67,8 @@ async function handler(req: ApiRequestProps<ExportCollectionBody, {}>, res: Next
|
||||
|
||||
return {
|
||||
...authRes,
|
||||
collection
|
||||
collection,
|
||||
chatTime
|
||||
};
|
||||
})();
|
||||
|
||||
@@ -107,11 +102,17 @@ async function handler(req: ApiRequestProps<ExportCollectionBody, {}>, res: Next
|
||||
readStream: cursor
|
||||
});
|
||||
|
||||
write(`\uFEFFindex,content`);
|
||||
write(`\uFEFFq,a`);
|
||||
|
||||
cursor.on('data', (doc) => {
|
||||
const sanitizedQ = sanitizeCsvField(doc.q || '');
|
||||
const sanitizedA = sanitizeCsvField(doc.a || '');
|
||||
const sanitizedQ = replaceS3KeyToPreviewUrl(
|
||||
sanitizeCsvField(doc.q || ''),
|
||||
addDays(new Date(), 90)
|
||||
);
|
||||
const sanitizedA = replaceS3KeyToPreviewUrl(
|
||||
sanitizeCsvField(doc.a || ''),
|
||||
addDays(new Date(), 90)
|
||||
);
|
||||
|
||||
write(`\n${sanitizedQ},${sanitizedA}`);
|
||||
});
|
||||
|
||||
@@ -83,7 +83,6 @@ import type {
|
||||
DatasetCreateWithFilesBody,
|
||||
DatasetCreateWithFilesResponse
|
||||
} from '@/pages/api/core/dataset/createWithFiles';
|
||||
import type { PresignDatasetFileGetUrlParams } from '@fastgpt/global/core/dataset/v2/api';
|
||||
|
||||
/* ======================== dataset ======================= */
|
||||
export const getDatasets = (data: GetDatasetListBody) =>
|
||||
|
||||
Reference in New Issue
Block a user