diff --git a/docSite/content/zh-cn/docs/development/openapi/dataset.md b/docSite/content/zh-cn/docs/development/openapi/dataset.md index 27a7f3f5a..0f16a6011 100644 --- a/docSite/content/zh-cn/docs/development/openapi/dataset.md +++ b/docSite/content/zh-cn/docs/development/openapi/dataset.md @@ -312,6 +312,8 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/delete? | chunkSize | 预估块大小 | | | chunkSplitter | 自定义最高优先分割符号 | | | qaPrompt | qa拆分提示词 | | +| tags | 集合标签(字符串数组) | | +| createTime | 文件创建时间(Date / String) | | **出参** @@ -604,9 +606,11 @@ curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/co --data-raw '{ "externalFileUrl":"https://image.xxxxx.com/fastgpt-dev/%E6%91%82.pdf", "externalFileId":"1111", - "filename":"自定义文件名", + "createTime": "2024-05-01T00:00:00.000Z", + "filename":"自定义文件名.pdf", "datasetId":"6642d105a5e9d2b00255b27b", "parentId": null, + "tags": ["tag1","tag2"], "trainingType": "chunk", "chunkSize":512, @@ -625,7 +629,8 @@ curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/co | --- | --- | --- | | externalFileUrl | 文件访问链接(可以是临时链接) | ✅ | | externalFileId | 外部文件ID | | -| filename | 自定义文件名 | | +| filename | 自定义文件名,需要带后缀 | | +| createTime | 文件创建时间(Date ISO 字符串都 ok) | | {{< /markdownify >}} @@ -710,7 +715,21 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio "updateTime": "2099-01-01T00:00:00.000Z", "dataAmount": 3, "trainingAmount": 0, - "canWrite": true + "externalFileId": "1111", + "tags": [ + "11", + "测试的" + ], + "forbid": false, + "trainingType": "chunk", + "permission": { + "value": 4294967295, + "isOwner": true, + "hasManagePer": true, + "hasWritePer": true, + "hasReadPer": true + } + }, { "_id": "65abd0ad9d1448617cba6031", @@ -722,7 +741,19 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio "updateTime": "2024-01-20T13:54:53.031Z", "dataAmount": 3, "trainingAmount": 0, - "canWrite": true + "externalFileId": "222", + "tags": [ + "测试的" + ], + "forbid": false, + "trainingType": "chunk", + "permission": { + "value": 4294967295, + "isOwner": true, + "hasManagePer": true, + "hasWritePer": true, + "hasReadPer": true + } } ], "total": 93 @@ -813,14 +844,36 @@ curl --location --request GET 'http://localhost:3000/api/core/dataset/collection {{< tab tabName="请求示例" >}} {{< markdownify >}} +**通过集合 ID 修改集合信息** + ```bash curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection/update' \ --header 'Authorization: Bearer {{authorization}}' \ --header 'Content-Type: application/json' \ --data-raw '{ "id":"65abcfab9d1448617cba5f0d", - "parentId":null, - "name":"测2222试" + "parentId": null, + "name": "测2222试", + "tags": ["tag1", "tag2"], + "forbid": false, + "createTime": "2024-01-01T00:00:00.000Z" +}' +``` + +**通过外部文件 ID 修改集合信息**, 只需要把 id 换成 datasetId 和 externalFileId。 + +```bash +curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection/update' \ +--header 'Authorization: Bearer {{authorization}}' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "datasetId":"6593e137231a2be9c5603ba7", + "externalFileId":"1111", + "parentId": null, + "name": "测2222试", + "tags": ["tag1", "tag2"], + "forbid": false, + "createTime": "2024-01-01T00:00:00.000Z" }' ``` @@ -834,6 +887,9 @@ curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection - id: 集合的ID - parentId: 修改父级ID(可选) - name: 修改集合名称(可选) +- tags: 修改集合标签(可选) +- forbid: 修改集合禁用状态(可选) +- createTime: 修改集合创建时间(可选) {{% /alert %}} {{< /markdownify >}} diff --git a/docSite/content/zh-cn/docs/development/upgrading/4811.md b/docSite/content/zh-cn/docs/development/upgrading/4811.md index a1213f449..5c343ae56 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4811.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4811.md @@ -95,6 +95,7 @@ weight: 813 9. 优化 - 工作流 handler 性能优化。 10. 优化 - 工作流快捷键,避免调试测试时也会触发。 11. 优化 - 流输出,切换 tab 时仍可以继续输出。 -12. 修复 - 知识库选择权限问题。 -13. 修复 - 空 chatId 发起对话,首轮携带用户选择时会异常。 -14. 修复 - createDataset 接口,intro 为赋值。 +12. 优化 - 完善外部文件知识库相关 API +13. 修复 - 知识库选择权限问题。 +14. 修复 - 空 chatId 发起对话,首轮携带用户选择时会异常。 +15. 修复 - createDataset 接口,intro 为赋值。 diff --git a/packages/service/core/dataset/collection/controller.ts b/packages/service/core/dataset/collection/controller.ts index 132101e1c..a19a279a1 100644 --- a/packages/service/core/dataset/collection/controller.ts +++ b/packages/service/core/dataset/collection/controller.ts @@ -45,7 +45,10 @@ export async function createOneCollection({ [key: string]: any; session?: ClientSession; }) { + // Create collection tags const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session }); + + // Create collection const [collection] = await MongoDatasetCollection.create( [ { diff --git a/packages/service/core/dataset/collection/schema.ts b/packages/service/core/dataset/collection/schema.ts index 9b6246c2b..00df3dc05 100644 --- a/packages/service/core/dataset/collection/schema.ts +++ b/packages/service/core/dataset/collection/schema.ts @@ -111,6 +111,17 @@ try { DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, tags: 1 }); // create time filter DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 }); + + // Get collection by external file id + DatasetCollectionSchema.index( + { datasetId: 1, externalFileId: 1 }, + { + unique: true, + partialFilterExpression: { + externalFileId: { $exists: true } + } + } + ); } catch (error) { console.log(error); } diff --git a/packages/service/core/dataset/collection/utils.ts b/packages/service/core/dataset/collection/utils.ts index 1718a800a..a1275924e 100644 --- a/packages/service/core/dataset/collection/utils.ts +++ b/packages/service/core/dataset/collection/utils.ts @@ -1,6 +1,5 @@ import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type.d'; import { MongoDatasetCollection } from './schema'; -import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type.d'; import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter'; import { MongoDatasetTraining } from '../training/schema'; import { urlsFetch } from '../../../common/string/cheerio'; @@ -12,6 +11,7 @@ import { hashStr } from '@fastgpt/global/common/string/tools'; import { ClientSession } from '../../../common/mongo'; import { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api'; import { MongoDatasetCollectionTags } from '../tag/schema'; +import { readFromSecondary } from '../../../common/mongo/utils'; /** * get all collection by top collectionId @@ -160,7 +160,7 @@ export const reloadCollectionChunks = async ({ const { chunks } = splitText2Chunks({ text: newRawText, chunkLen: col.chunkSize || 512, - customReg: col.chunkSplitter ? [col.chunkSplitter] : [], + customReg: col.chunkSplitter ? [col.chunkSplitter] : [] }); // insert to training queue @@ -204,7 +204,7 @@ export const reloadCollectionChunks = async ({ }; export const createOrGetCollectionTags = async ({ - tags = [], + tags, datasetId, teamId, session @@ -213,13 +213,20 @@ export const createOrGetCollectionTags = async ({ datasetId: string; teamId: string; session?: ClientSession; -}): Promise => { - if (!tags.length) return []; - const existingTags = await MongoDatasetCollectionTags.find({ - teamId, - datasetId, - $expr: { $in: ['$tag', tags] } - }); +}) => { + if (!tags) return undefined; + + if (tags.length === 0) return []; + + const existingTags = await MongoDatasetCollectionTags.find( + { + teamId, + datasetId, + tag: { $in: tags } + }, + undefined, + { session } + ).lean(); const existingTagContents = existingTags.map((tag) => tag.tag); const newTagContents = tags.filter((tag) => !existingTagContents.includes(tag)); @@ -235,3 +242,29 @@ export const createOrGetCollectionTags = async ({ return [...existingTags.map((tag) => tag._id), ...newTags.map((tag) => tag._id)]; }; + +export const collectionTagsToTagLabel = async ({ + datasetId, + tags +}: { + datasetId: string; + tags?: string[]; +}) => { + if (!tags) return undefined; + if (tags.length === 0) return; + + // Get all the tags + const collectionTags = await MongoDatasetCollectionTags.find({ datasetId }, undefined, { + ...readFromSecondary + }).lean(); + const tagsMap = new Map(); + collectionTags.forEach((tag) => { + tagsMap.set(String(tag._id), tag.tag); + }); + + return tags + .map((tag) => { + return tagsMap.get(tag) || ''; + }) + .filter(Boolean); +}; diff --git a/projects/app/src/global/core/dataset/type.d.ts b/projects/app/src/global/core/dataset/type.d.ts index a487b7f1c..945270b01 100644 --- a/projects/app/src/global/core/dataset/type.d.ts +++ b/projects/app/src/global/core/dataset/type.d.ts @@ -21,6 +21,8 @@ export type DatasetCollectionsListItemType = { trainingType?: DatasetCollectionSchemaType['trainingType']; tags?: string[]; + externalFileId?: string; + fileId?: string; rawLink?: string; permission: DatasetPermission; diff --git a/projects/app/src/pages/api/core/dataset/collection/detail.ts b/projects/app/src/pages/api/core/dataset/collection/detail.ts index fdae3cd4f..27f364102 100644 --- a/projects/app/src/pages/api/core/dataset/collection/detail.ts +++ b/projects/app/src/pages/api/core/dataset/collection/detail.ts @@ -10,6 +10,7 @@ import { NextAPI } from '@/service/middleware/entry'; import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant'; import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type'; import { CommonErrEnum } from '@fastgpt/global/common/error/code/common'; +import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils'; async function handler(req: NextApiRequest): Promise { const { id } = req.query as { id: string }; @@ -35,6 +36,10 @@ async function handler(req: NextApiRequest): Promise return { ...collection, ...getCollectionSourceData(collection), + tags: await collectionTagsToTagLabel({ + datasetId: collection.datasetId._id, + tags: collection.tags + }), permission, file }; diff --git a/projects/app/src/pages/api/core/dataset/collection/list.ts b/projects/app/src/pages/api/core/dataset/collection/list.ts index 1d64a3e17..7c7410275 100644 --- a/projects/app/src/pages/api/core/dataset/collection/list.ts +++ b/projects/app/src/pages/api/core/dataset/collection/list.ts @@ -11,6 +11,8 @@ import { startTrainingQueue } from '@/service/core/dataset/training/utils'; import { NextAPI } from '@/service/middleware/entry'; import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant'; import { PagingData } from '@/types'; +import { readFromSecondary } from '@fastgpt/service/common/mongo/utils'; +import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils'; async function handler(req: NextApiRequest): Promise> { let { @@ -60,12 +62,15 @@ async function handler(req: NextApiRequest): Promise ({ ...item, + tags: await collectionTagsToTagLabel({ + datasetId, + tags: item.tags + }), dataAmount: 0, trainingAmount: 0, permission @@ -153,12 +162,18 @@ async function handler(req: NextApiRequest): Promise ({ ...item, + tags: await collectionTagsToTagLabel({ + datasetId, + tags: item.tags + }), permission })) ); diff --git a/projects/app/src/pages/api/core/dataset/collection/update.ts b/projects/app/src/pages/api/core/dataset/collection/update.ts index d8327f7cd..4687ef440 100644 --- a/projects/app/src/pages/api/core/dataset/collection/update.ts +++ b/projects/app/src/pages/api/core/dataset/collection/update.ts @@ -1,5 +1,8 @@ import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; -import { getCollectionUpdateTime } from '@fastgpt/service/core/dataset/collection/utils'; +import { + createOrGetCollectionTags, + getCollectionUpdateTime +} from '@fastgpt/service/core/dataset/collection/utils'; import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth'; import { NextAPI } from '@/service/middleware/entry'; import { WritePermissionVal } from '@fastgpt/global/support/permission/constant'; @@ -11,11 +14,16 @@ import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type'; import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun'; export type UpdateDatasetCollectionParams = { - id: string; + id?: string; parentId?: string; name?: string; - tags?: string[]; + tags?: string[]; // Not tag id, is tag label forbid?: boolean; + createTime?: Date; + + // External file id + datasetId?: string; + externalFileId?: string; }; // Set folder collection children forbid status @@ -65,14 +73,22 @@ const updateFolderChildrenForbid = async ({ }; async function handler(req: ApiRequestProps) { - const { id, parentId, name, tags, forbid } = req.body; + let { datasetId, externalFileId, id, parentId, name, tags, forbid, createTime } = req.body; + + if (datasetId && externalFileId) { + const collection = await MongoDatasetCollection.findOne({ datasetId, externalFileId }, '_id'); + if (!collection) { + return Promise.reject(CommonErrEnum.fileNotFound); + } + id = collection._id; + } if (!id) { return Promise.reject(CommonErrEnum.missingParams); } // 凭证校验 - const { collection } = await authDatasetCollection({ + const { collection, teamId } = await authDatasetCollection({ req, authToken: true, authApiKey: true, @@ -81,6 +97,13 @@ async function handler(req: ApiRequestProps) { }); await mongoSessionRun(async (session) => { + const collectionTags = await createOrGetCollectionTags({ + tags, + teamId, + datasetId: collection.datasetId._id, + session + }); + await MongoDatasetCollection.updateOne( { _id: id @@ -89,8 +112,9 @@ async function handler(req: ApiRequestProps) { $set: { ...(parentId !== undefined && { parentId: parentId || null }), ...(name && { name, updateTime: getCollectionUpdateTime({ name }) }), - ...(tags && { tags }), - ...(forbid !== undefined && { forbid }) + ...(collectionTags !== undefined && { tags: collectionTags }), + ...(forbid !== undefined && { forbid }), + ...(createTime !== undefined && { createTime }) } }, { diff --git a/projects/app/src/pages/dataset/detail/components/CollectionCard/TagsPopOver.tsx b/projects/app/src/pages/dataset/detail/components/CollectionCard/TagsPopOver.tsx index 1d033c4d3..63461ea62 100644 --- a/projects/app/src/pages/dataset/detail/components/CollectionCard/TagsPopOver.tsx +++ b/projects/app/src/pages/dataset/detail/components/CollectionCard/TagsPopOver.tsx @@ -35,8 +35,8 @@ const TagsPopOver = ({ const tagList = useMemo( () => (collectionTags - ?.map((tagId) => { - const tagObject = allDatasetTags.find((tag) => tag._id === tagId); + ?.map((item) => { + const tagObject = allDatasetTags.find((tag) => tag.tag === item); return tagObject ? { _id: tagObject._id, tag: tagObject.tag } : null; }) .filter((tag) => tag !== null) as { @@ -153,9 +153,9 @@ const TagsPopOver = ({ setIsUpdateLoading(true); await putDatasetCollectionById({ id: currentCollection._id, - tags: checkedTags.map((tag) => tag._id) + tags: checkedTags.map((tag) => tag.tag) }); - setCollectionTags(checkedTags.map((tag) => tag._id)); + setCollectionTags(checkedTags.map((tag) => tag.tag)); setIsUpdateLoading(false); }} display={showTagManage || overflowTags.length > 0 ? 'block' : 'none'} diff --git a/projects/app/src/pages/dataset/detail/components/DataCard.tsx b/projects/app/src/pages/dataset/detail/components/DataCard.tsx index 1bb5bec53..5d61ccb6e 100644 --- a/projects/app/src/pages/dataset/detail/components/DataCard.tsx +++ b/projects/app/src/pages/dataset/detail/components/DataCard.tsx @@ -1,10 +1,9 @@ -import React, { useState, useRef, useMemo } from 'react'; +import React, { useState, useMemo } from 'react'; import { Box, Card, IconButton, Flex, Button, useTheme } from '@chakra-ui/react'; import { getDatasetDataList, delOneDatasetDataById, - getDatasetCollectionById, - putDatasetDataById + getDatasetCollectionById } from '@/web/core/dataset/api'; import { useQuery } from '@tanstack/react-query'; import { useToast } from '@fastgpt/web/hooks/useToast'; @@ -21,7 +20,6 @@ import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection import EmptyTip from '@fastgpt/web/components/common/EmptyTip'; import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; import { useContextSelector } from 'use-context-selector'; -import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; import MyTag from '@fastgpt/web/components/common/Tag/index'; import MyBox from '@fastgpt/web/components/common/MyBox'; import { useSystem } from '@fastgpt/web/hooks/useSystem'; @@ -96,24 +94,27 @@ const DataCard = () => { {/* Header */} - - - - {collection?._id && ( - - )} - - {feConfigs?.isPlus && !!collection?.tags?.length && ( - + + + {collection?._id && ( + )} - + {feConfigs?.isPlus && !!collection?.tags?.length && ( + + )} + {canWrite && (