Add externalfile api params (#2745)

* feat: external dataset api

* perf: doc
This commit is contained in:
Archer
2024-09-19 13:28:55 +08:00
committed by GitHub
parent 258de4471e
commit 265434799f
12 changed files with 207 additions and 55 deletions

View File

@@ -312,6 +312,8 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/delete?
| chunkSize | 预估块大小 | | | chunkSize | 预估块大小 | |
| chunkSplitter | 自定义最高优先分割符号 | | | chunkSplitter | 自定义最高优先分割符号 | |
| qaPrompt | qa拆分提示词 | | | qaPrompt | qa拆分提示词 | |
| tags | 集合标签(字符串数组) | |
| createTime | 文件创建时间Date / String | |
**出参** **出参**
@@ -604,9 +606,11 @@ curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/co
--data-raw '{ --data-raw '{
"externalFileUrl":"https://image.xxxxx.com/fastgpt-dev/%E6%91%82.pdf", "externalFileUrl":"https://image.xxxxx.com/fastgpt-dev/%E6%91%82.pdf",
"externalFileId":"1111", "externalFileId":"1111",
"filename":"自定义文件名", "createTime": "2024-05-01T00:00:00.000Z",
"filename":"自定义文件名.pdf",
"datasetId":"6642d105a5e9d2b00255b27b", "datasetId":"6642d105a5e9d2b00255b27b",
"parentId": null, "parentId": null,
"tags": ["tag1","tag2"],
"trainingType": "chunk", "trainingType": "chunk",
"chunkSize":512, "chunkSize":512,
@@ -625,7 +629,8 @@ curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/co
| --- | --- | --- | | --- | --- | --- |
| externalFileUrl | 文件访问链接(可以是临时链接) | ✅ | | externalFileUrl | 文件访问链接(可以是临时链接) | ✅ |
| externalFileId | 外部文件ID | | | externalFileId | 外部文件ID | |
| filename | 自定义文件名 | | | filename | 自定义文件名,需要带后缀 | |
| createTime | 文件创建时间Date ISO 字符串都 ok | |
{{< /markdownify >}} {{< /markdownify >}}
@@ -710,7 +715,21 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio
"updateTime": "2099-01-01T00:00:00.000Z", "updateTime": "2099-01-01T00:00:00.000Z",
"dataAmount": 3, "dataAmount": 3,
"trainingAmount": 0, "trainingAmount": 0,
"canWrite": true "externalFileId": "1111",
"tags": [
"11",
"测试的"
],
"forbid": false,
"trainingType": "chunk",
"permission": {
"value": 4294967295,
"isOwner": true,
"hasManagePer": true,
"hasWritePer": true,
"hasReadPer": true
}
}, },
{ {
"_id": "65abd0ad9d1448617cba6031", "_id": "65abd0ad9d1448617cba6031",
@@ -722,7 +741,19 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio
"updateTime": "2024-01-20T13:54:53.031Z", "updateTime": "2024-01-20T13:54:53.031Z",
"dataAmount": 3, "dataAmount": 3,
"trainingAmount": 0, "trainingAmount": 0,
"canWrite": true "externalFileId": "222",
"tags": [
"测试的"
],
"forbid": false,
"trainingType": "chunk",
"permission": {
"value": 4294967295,
"isOwner": true,
"hasManagePer": true,
"hasWritePer": true,
"hasReadPer": true
}
} }
], ],
"total": 93 "total": 93
@@ -813,14 +844,36 @@ curl --location --request GET 'http://localhost:3000/api/core/dataset/collection
{{< tab tabName="请求示例" >}} {{< tab tabName="请求示例" >}}
{{< markdownify >}} {{< markdownify >}}
**通过集合 ID 修改集合信息**
```bash ```bash
curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection/update' \ curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection/update' \
--header 'Authorization: Bearer {{authorization}}' \ --header 'Authorization: Bearer {{authorization}}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
"id":"65abcfab9d1448617cba5f0d", "id":"65abcfab9d1448617cba5f0d",
"parentId":null, "parentId": null,
"name":"测2222试" "name": "测2222试",
"tags": ["tag1", "tag2"],
"forbid": false,
"createTime": "2024-01-01T00:00:00.000Z"
}'
```
**通过外部文件 ID 修改集合信息** 只需要把 id 换成 datasetId 和 externalFileId。
```bash
curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection/update' \
--header 'Authorization: Bearer {{authorization}}' \
--header 'Content-Type: application/json' \
--data-raw '{
"datasetId":"6593e137231a2be9c5603ba7",
"externalFileId":"1111",
"parentId": null,
"name": "测2222试",
"tags": ["tag1", "tag2"],
"forbid": false,
"createTime": "2024-01-01T00:00:00.000Z"
}' }'
``` ```
@@ -834,6 +887,9 @@ curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection
- id: 集合的ID - id: 集合的ID
- parentId: 修改父级ID可选 - parentId: 修改父级ID可选
- name: 修改集合名称(可选) - name: 修改集合名称(可选)
- tags: 修改集合标签(可选)
- forbid: 修改集合禁用状态(可选)
- createTime: 修改集合创建时间(可选)
{{% /alert %}} {{% /alert %}}
{{< /markdownify >}} {{< /markdownify >}}

View File

@@ -95,6 +95,7 @@ weight: 813
9. 优化 - 工作流 handler 性能优化。 9. 优化 - 工作流 handler 性能优化。
10. 优化 - 工作流快捷键,避免调试测试时也会触发。 10. 优化 - 工作流快捷键,避免调试测试时也会触发。
11. 优化 - 流输出,切换 tab 时仍可以继续输出。 11. 优化 - 流输出,切换 tab 时仍可以继续输出。
12. 修复 - 知识库选择权限问题。 12. 优化 - 完善外部文件知识库相关 API
13. 修复 - 空 chatId 发起对话,首轮携带用户选择时会异常 13. 修复 - 知识库选择权限问题
14. 修复 - createDataset 接口intro 为赋值 14. 修复 - 空 chatId 发起对话,首轮携带用户选择时会异常
15. 修复 - createDataset 接口intro 为赋值。

View File

@@ -45,7 +45,10 @@ export async function createOneCollection({
[key: string]: any; [key: string]: any;
session?: ClientSession; session?: ClientSession;
}) { }) {
// Create collection tags
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session }); const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
// Create collection
const [collection] = await MongoDatasetCollection.create( const [collection] = await MongoDatasetCollection.create(
[ [
{ {

View File

@@ -111,6 +111,17 @@ try {
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, tags: 1 }); DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, tags: 1 });
// create time filter // create time filter
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 }); DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 });
// Get collection by external file id
DatasetCollectionSchema.index(
{ datasetId: 1, externalFileId: 1 },
{
unique: true,
partialFilterExpression: {
externalFileId: { $exists: true }
}
}
);
} catch (error) { } catch (error) {
console.log(error); console.log(error);
} }

View File

@@ -1,6 +1,5 @@
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type.d'; import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type.d';
import { MongoDatasetCollection } from './schema'; import { MongoDatasetCollection } from './schema';
import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type.d';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter'; import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { MongoDatasetTraining } from '../training/schema'; import { MongoDatasetTraining } from '../training/schema';
import { urlsFetch } from '../../../common/string/cheerio'; import { urlsFetch } from '../../../common/string/cheerio';
@@ -12,6 +11,7 @@ import { hashStr } from '@fastgpt/global/common/string/tools';
import { ClientSession } from '../../../common/mongo'; import { ClientSession } from '../../../common/mongo';
import { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api'; import { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api';
import { MongoDatasetCollectionTags } from '../tag/schema'; import { MongoDatasetCollectionTags } from '../tag/schema';
import { readFromSecondary } from '../../../common/mongo/utils';
/** /**
* get all collection by top collectionId * get all collection by top collectionId
@@ -160,7 +160,7 @@ export const reloadCollectionChunks = async ({
const { chunks } = splitText2Chunks({ const { chunks } = splitText2Chunks({
text: newRawText, text: newRawText,
chunkLen: col.chunkSize || 512, chunkLen: col.chunkSize || 512,
customReg: col.chunkSplitter ? [col.chunkSplitter] : [], customReg: col.chunkSplitter ? [col.chunkSplitter] : []
}); });
// insert to training queue // insert to training queue
@@ -204,7 +204,7 @@ export const reloadCollectionChunks = async ({
}; };
export const createOrGetCollectionTags = async ({ export const createOrGetCollectionTags = async ({
tags = [], tags,
datasetId, datasetId,
teamId, teamId,
session session
@@ -213,13 +213,20 @@ export const createOrGetCollectionTags = async ({
datasetId: string; datasetId: string;
teamId: string; teamId: string;
session?: ClientSession; session?: ClientSession;
}): Promise<string[]> => { }) => {
if (!tags.length) return []; if (!tags) return undefined;
const existingTags = await MongoDatasetCollectionTags.find({
teamId, if (tags.length === 0) return [];
datasetId,
$expr: { $in: ['$tag', tags] } const existingTags = await MongoDatasetCollectionTags.find(
}); {
teamId,
datasetId,
tag: { $in: tags }
},
undefined,
{ session }
).lean();
const existingTagContents = existingTags.map((tag) => tag.tag); const existingTagContents = existingTags.map((tag) => tag.tag);
const newTagContents = tags.filter((tag) => !existingTagContents.includes(tag)); const newTagContents = tags.filter((tag) => !existingTagContents.includes(tag));
@@ -235,3 +242,29 @@ export const createOrGetCollectionTags = async ({
return [...existingTags.map((tag) => tag._id), ...newTags.map((tag) => tag._id)]; return [...existingTags.map((tag) => tag._id), ...newTags.map((tag) => tag._id)];
}; };
export const collectionTagsToTagLabel = async ({
datasetId,
tags
}: {
datasetId: string;
tags?: string[];
}) => {
if (!tags) return undefined;
if (tags.length === 0) return;
// Get all the tags
const collectionTags = await MongoDatasetCollectionTags.find({ datasetId }, undefined, {
...readFromSecondary
}).lean();
const tagsMap = new Map<string, string>();
collectionTags.forEach((tag) => {
tagsMap.set(String(tag._id), tag.tag);
});
return tags
.map((tag) => {
return tagsMap.get(tag) || '';
})
.filter(Boolean);
};

View File

@@ -21,6 +21,8 @@ export type DatasetCollectionsListItemType = {
trainingType?: DatasetCollectionSchemaType['trainingType']; trainingType?: DatasetCollectionSchemaType['trainingType'];
tags?: string[]; tags?: string[];
externalFileId?: string;
fileId?: string; fileId?: string;
rawLink?: string; rawLink?: string;
permission: DatasetPermission; permission: DatasetPermission;

View File

@@ -10,6 +10,7 @@ import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant'; import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type'; import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common'; import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> { async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
const { id } = req.query as { id: string }; const { id } = req.query as { id: string };
@@ -35,6 +36,10 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
return { return {
...collection, ...collection,
...getCollectionSourceData(collection), ...getCollectionSourceData(collection),
tags: await collectionTagsToTagLabel({
datasetId: collection.datasetId._id,
tags: collection.tags
}),
permission, permission,
file file
}; };

View File

@@ -11,6 +11,8 @@ import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { NextAPI } from '@/service/middleware/entry'; import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant'; import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { PagingData } from '@/types'; import { PagingData } from '@/types';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectionsListItemType>> { async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectionsListItemType>> {
let { let {
@@ -60,12 +62,15 @@ async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectio
trainingType: 1, trainingType: 1,
fileId: 1, fileId: 1,
rawLink: 1, rawLink: 1,
tags: 1 tags: 1,
externalFileId: 1
}; };
// not count data amount // not count data amount
if (simple) { if (simple) {
const collections = await MongoDatasetCollection.find(match) const collections = await MongoDatasetCollection.find(match, undefined, {
...readFromSecondary
})
.select(selectField) .select(selectField)
.sort({ .sort({
updateTime: -1 updateTime: -1
@@ -78,6 +83,10 @@ async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectio
data: await Promise.all( data: await Promise.all(
collections.map(async (item) => ({ collections.map(async (item) => ({
...item, ...item,
tags: await collectionTagsToTagLabel({
datasetId,
tags: item.tags
}),
dataAmount: 0, dataAmount: 0,
trainingAmount: 0, trainingAmount: 0,
permission permission
@@ -153,12 +162,18 @@ async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectio
} }
} }
]), ]),
MongoDatasetCollection.countDocuments(match) MongoDatasetCollection.countDocuments(match, {
...readFromSecondary
})
]); ]);
const data = await Promise.all( const data = await Promise.all(
collections.map(async (item) => ({ collections.map(async (item) => ({
...item, ...item,
tags: await collectionTagsToTagLabel({
datasetId,
tags: item.tags
}),
permission permission
})) }))
); );

View File

@@ -1,5 +1,8 @@
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { getCollectionUpdateTime } from '@fastgpt/service/core/dataset/collection/utils'; import {
createOrGetCollectionTags,
getCollectionUpdateTime
} from '@fastgpt/service/core/dataset/collection/utils';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth'; import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry'; import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant'; import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
@@ -11,11 +14,16 @@ import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun'; import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
export type UpdateDatasetCollectionParams = { export type UpdateDatasetCollectionParams = {
id: string; id?: string;
parentId?: string; parentId?: string;
name?: string; name?: string;
tags?: string[]; tags?: string[]; // Not tag id, is tag label
forbid?: boolean; forbid?: boolean;
createTime?: Date;
// External file id
datasetId?: string;
externalFileId?: string;
}; };
// Set folder collection children forbid status // Set folder collection children forbid status
@@ -65,14 +73,22 @@ const updateFolderChildrenForbid = async ({
}; };
async function handler(req: ApiRequestProps<UpdateDatasetCollectionParams>) { async function handler(req: ApiRequestProps<UpdateDatasetCollectionParams>) {
const { id, parentId, name, tags, forbid } = req.body; let { datasetId, externalFileId, id, parentId, name, tags, forbid, createTime } = req.body;
if (datasetId && externalFileId) {
const collection = await MongoDatasetCollection.findOne({ datasetId, externalFileId }, '_id');
if (!collection) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
id = collection._id;
}
if (!id) { if (!id) {
return Promise.reject(CommonErrEnum.missingParams); return Promise.reject(CommonErrEnum.missingParams);
} }
// 凭证校验 // 凭证校验
const { collection } = await authDatasetCollection({ const { collection, teamId } = await authDatasetCollection({
req, req,
authToken: true, authToken: true,
authApiKey: true, authApiKey: true,
@@ -81,6 +97,13 @@ async function handler(req: ApiRequestProps<UpdateDatasetCollectionParams>) {
}); });
await mongoSessionRun(async (session) => { await mongoSessionRun(async (session) => {
const collectionTags = await createOrGetCollectionTags({
tags,
teamId,
datasetId: collection.datasetId._id,
session
});
await MongoDatasetCollection.updateOne( await MongoDatasetCollection.updateOne(
{ {
_id: id _id: id
@@ -89,8 +112,9 @@ async function handler(req: ApiRequestProps<UpdateDatasetCollectionParams>) {
$set: { $set: {
...(parentId !== undefined && { parentId: parentId || null }), ...(parentId !== undefined && { parentId: parentId || null }),
...(name && { name, updateTime: getCollectionUpdateTime({ name }) }), ...(name && { name, updateTime: getCollectionUpdateTime({ name }) }),
...(tags && { tags }), ...(collectionTags !== undefined && { tags: collectionTags }),
...(forbid !== undefined && { forbid }) ...(forbid !== undefined && { forbid }),
...(createTime !== undefined && { createTime })
} }
}, },
{ {

View File

@@ -35,8 +35,8 @@ const TagsPopOver = ({
const tagList = useMemo( const tagList = useMemo(
() => () =>
(collectionTags (collectionTags
?.map((tagId) => { ?.map((item) => {
const tagObject = allDatasetTags.find((tag) => tag._id === tagId); const tagObject = allDatasetTags.find((tag) => tag.tag === item);
return tagObject ? { _id: tagObject._id, tag: tagObject.tag } : null; return tagObject ? { _id: tagObject._id, tag: tagObject.tag } : null;
}) })
.filter((tag) => tag !== null) as { .filter((tag) => tag !== null) as {
@@ -153,9 +153,9 @@ const TagsPopOver = ({
setIsUpdateLoading(true); setIsUpdateLoading(true);
await putDatasetCollectionById({ await putDatasetCollectionById({
id: currentCollection._id, id: currentCollection._id,
tags: checkedTags.map((tag) => tag._id) tags: checkedTags.map((tag) => tag.tag)
}); });
setCollectionTags(checkedTags.map((tag) => tag._id)); setCollectionTags(checkedTags.map((tag) => tag.tag));
setIsUpdateLoading(false); setIsUpdateLoading(false);
}} }}
display={showTagManage || overflowTags.length > 0 ? 'block' : 'none'} display={showTagManage || overflowTags.length > 0 ? 'block' : 'none'}

View File

@@ -1,10 +1,9 @@
import React, { useState, useRef, useMemo } from 'react'; import React, { useState, useMemo } from 'react';
import { Box, Card, IconButton, Flex, Button, useTheme } from '@chakra-ui/react'; import { Box, Card, IconButton, Flex, Button, useTheme } from '@chakra-ui/react';
import { import {
getDatasetDataList, getDatasetDataList,
delOneDatasetDataById, delOneDatasetDataById,
getDatasetCollectionById, getDatasetCollectionById
putDatasetDataById
} from '@/web/core/dataset/api'; } from '@/web/core/dataset/api';
import { useQuery } from '@tanstack/react-query'; import { useQuery } from '@tanstack/react-query';
import { useToast } from '@fastgpt/web/hooks/useToast'; import { useToast } from '@fastgpt/web/hooks/useToast';
@@ -21,7 +20,6 @@ import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection
import EmptyTip from '@fastgpt/web/components/common/EmptyTip'; import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useContextSelector } from 'use-context-selector'; import { useContextSelector } from 'use-context-selector';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import MyTag from '@fastgpt/web/components/common/Tag/index'; import MyTag from '@fastgpt/web/components/common/Tag/index';
import MyBox from '@fastgpt/web/components/common/MyBox'; import MyBox from '@fastgpt/web/components/common/MyBox';
import { useSystem } from '@fastgpt/web/hooks/useSystem'; import { useSystem } from '@fastgpt/web/hooks/useSystem';
@@ -96,24 +94,27 @@ const DataCard = () => {
<Flex flexDirection={'column'} h={'100%'}> <Flex flexDirection={'column'} h={'100%'}>
{/* Header */} {/* Header */}
<Flex alignItems={'center'} px={6}> <Flex alignItems={'center'} px={6}>
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}> <Box flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
<Box> <Box
<Box alignItems={'center'} gap={2} display={isPc ? 'flex' : ''}> className="textEllipsis"
{collection?._id && ( alignItems={'center'}
<RawSourceBox gap={2}
collectionId={collection._id} display={isPc ? 'flex' : ''}
{...getCollectionSourceData(collection)} >
fontSize={['sm', 'md']} {collection?._id && (
color={'black'} <RawSourceBox
textDecoration={'none'} collectionId={collection._id}
/> {...getCollectionSourceData(collection)}
)} fontSize={['sm', 'md']}
</Box> color={'black'}
{feConfigs?.isPlus && !!collection?.tags?.length && ( textDecoration={'none'}
<TagsPopOver currentCollection={collection} /> />
)} )}
</Box> </Box>
</Flex> {feConfigs?.isPlus && !!collection?.tags?.length && (
<TagsPopOver currentCollection={collection} />
)}
</Box>
{canWrite && ( {canWrite && (
<Box> <Box>
<Button <Button

View File

@@ -243,8 +243,9 @@ const Info = ({ datasetId }: { datasetId: string }) => {
const vectorModel = vectorModelList.find((item) => item.model === e); const vectorModel = vectorModelList.find((item) => item.model === e);
if (!vectorModel) return; if (!vectorModel) return;
return onOpenConfirmRebuild(() => { return onOpenConfirmRebuild(() => {
setValue('vectorModel', vectorModel); return onRebuilding(vectorModel).then(() => {
return onRebuilding(vectorModel); setValue('vectorModel', vectorModel);
});
})(); })();
}} }}
/> />