mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 13:03:50 +00:00
Add externalfile api params (#2745)
* feat: external dataset api * perf: doc
This commit is contained in:
@@ -312,6 +312,8 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/delete?
|
||||
| chunkSize | 预估块大小 | |
|
||||
| chunkSplitter | 自定义最高优先分割符号 | |
|
||||
| qaPrompt | qa拆分提示词 | |
|
||||
| tags | 集合标签(字符串数组) | |
|
||||
| createTime | 文件创建时间(Date / String) | |
|
||||
|
||||
**出参**
|
||||
|
||||
@@ -604,9 +606,11 @@ curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/co
|
||||
--data-raw '{
|
||||
"externalFileUrl":"https://image.xxxxx.com/fastgpt-dev/%E6%91%82.pdf",
|
||||
"externalFileId":"1111",
|
||||
"filename":"自定义文件名",
|
||||
"createTime": "2024-05-01T00:00:00.000Z",
|
||||
"filename":"自定义文件名.pdf",
|
||||
"datasetId":"6642d105a5e9d2b00255b27b",
|
||||
"parentId": null,
|
||||
"tags": ["tag1","tag2"],
|
||||
|
||||
"trainingType": "chunk",
|
||||
"chunkSize":512,
|
||||
@@ -625,7 +629,8 @@ curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/co
|
||||
| --- | --- | --- |
|
||||
| externalFileUrl | 文件访问链接(可以是临时链接) | ✅ |
|
||||
| externalFileId | 外部文件ID | |
|
||||
| filename | 自定义文件名 | |
|
||||
| filename | 自定义文件名,需要带后缀 | |
|
||||
| createTime | 文件创建时间(Date ISO 字符串都 ok) | |
|
||||
|
||||
|
||||
{{< /markdownify >}}
|
||||
@@ -710,7 +715,21 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio
|
||||
"updateTime": "2099-01-01T00:00:00.000Z",
|
||||
"dataAmount": 3,
|
||||
"trainingAmount": 0,
|
||||
"canWrite": true
|
||||
"externalFileId": "1111",
|
||||
"tags": [
|
||||
"11",
|
||||
"测试的"
|
||||
],
|
||||
"forbid": false,
|
||||
"trainingType": "chunk",
|
||||
"permission": {
|
||||
"value": 4294967295,
|
||||
"isOwner": true,
|
||||
"hasManagePer": true,
|
||||
"hasWritePer": true,
|
||||
"hasReadPer": true
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"_id": "65abd0ad9d1448617cba6031",
|
||||
@@ -722,7 +741,19 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio
|
||||
"updateTime": "2024-01-20T13:54:53.031Z",
|
||||
"dataAmount": 3,
|
||||
"trainingAmount": 0,
|
||||
"canWrite": true
|
||||
"externalFileId": "222",
|
||||
"tags": [
|
||||
"测试的"
|
||||
],
|
||||
"forbid": false,
|
||||
"trainingType": "chunk",
|
||||
"permission": {
|
||||
"value": 4294967295,
|
||||
"isOwner": true,
|
||||
"hasManagePer": true,
|
||||
"hasWritePer": true,
|
||||
"hasReadPer": true
|
||||
}
|
||||
}
|
||||
],
|
||||
"total": 93
|
||||
@@ -813,14 +844,36 @@ curl --location --request GET 'http://localhost:3000/api/core/dataset/collection
|
||||
{{< tab tabName="请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
**通过集合 ID 修改集合信息**
|
||||
|
||||
```bash
|
||||
curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection/update' \
|
||||
--header 'Authorization: Bearer {{authorization}}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"id":"65abcfab9d1448617cba5f0d",
|
||||
"parentId":null,
|
||||
"name":"测2222试"
|
||||
"parentId": null,
|
||||
"name": "测2222试",
|
||||
"tags": ["tag1", "tag2"],
|
||||
"forbid": false,
|
||||
"createTime": "2024-01-01T00:00:00.000Z"
|
||||
}'
|
||||
```
|
||||
|
||||
**通过外部文件 ID 修改集合信息**, 只需要把 id 换成 datasetId 和 externalFileId。
|
||||
|
||||
```bash
|
||||
curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection/update' \
|
||||
--header 'Authorization: Bearer {{authorization}}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"datasetId":"6593e137231a2be9c5603ba7",
|
||||
"externalFileId":"1111",
|
||||
"parentId": null,
|
||||
"name": "测2222试",
|
||||
"tags": ["tag1", "tag2"],
|
||||
"forbid": false,
|
||||
"createTime": "2024-01-01T00:00:00.000Z"
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -834,6 +887,9 @@ curl --location --request PUT 'http://localhost:3000/api/core/dataset/collection
|
||||
- id: 集合的ID
|
||||
- parentId: 修改父级ID(可选)
|
||||
- name: 修改集合名称(可选)
|
||||
- tags: 修改集合标签(可选)
|
||||
- forbid: 修改集合禁用状态(可选)
|
||||
- createTime: 修改集合创建时间(可选)
|
||||
{{% /alert %}}
|
||||
|
||||
{{< /markdownify >}}
|
||||
|
@@ -95,6 +95,7 @@ weight: 813
|
||||
9. 优化 - 工作流 handler 性能优化。
|
||||
10. 优化 - 工作流快捷键,避免调试测试时也会触发。
|
||||
11. 优化 - 流输出,切换 tab 时仍可以继续输出。
|
||||
12. 修复 - 知识库选择权限问题。
|
||||
13. 修复 - 空 chatId 发起对话,首轮携带用户选择时会异常。
|
||||
14. 修复 - createDataset 接口,intro 为赋值。
|
||||
12. 优化 - 完善外部文件知识库相关 API
|
||||
13. 修复 - 知识库选择权限问题。
|
||||
14. 修复 - 空 chatId 发起对话,首轮携带用户选择时会异常。
|
||||
15. 修复 - createDataset 接口,intro 为赋值。
|
||||
|
@@ -45,7 +45,10 @@ export async function createOneCollection({
|
||||
[key: string]: any;
|
||||
session?: ClientSession;
|
||||
}) {
|
||||
// Create collection tags
|
||||
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
|
||||
|
||||
// Create collection
|
||||
const [collection] = await MongoDatasetCollection.create(
|
||||
[
|
||||
{
|
||||
|
@@ -111,6 +111,17 @@ try {
|
||||
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, tags: 1 });
|
||||
// create time filter
|
||||
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 });
|
||||
|
||||
// Get collection by external file id
|
||||
DatasetCollectionSchema.index(
|
||||
{ datasetId: 1, externalFileId: 1 },
|
||||
{
|
||||
unique: true,
|
||||
partialFilterExpression: {
|
||||
externalFileId: { $exists: true }
|
||||
}
|
||||
}
|
||||
);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -1,6 +1,5 @@
|
||||
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type.d';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { urlsFetch } from '../../../common/string/cheerio';
|
||||
@@ -12,6 +11,7 @@ import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { ClientSession } from '../../../common/mongo';
|
||||
import { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
|
||||
/**
|
||||
* get all collection by top collectionId
|
||||
@@ -160,7 +160,7 @@ export const reloadCollectionChunks = async ({
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: newRawText,
|
||||
chunkLen: col.chunkSize || 512,
|
||||
customReg: col.chunkSplitter ? [col.chunkSplitter] : [],
|
||||
customReg: col.chunkSplitter ? [col.chunkSplitter] : []
|
||||
});
|
||||
|
||||
// insert to training queue
|
||||
@@ -204,7 +204,7 @@ export const reloadCollectionChunks = async ({
|
||||
};
|
||||
|
||||
export const createOrGetCollectionTags = async ({
|
||||
tags = [],
|
||||
tags,
|
||||
datasetId,
|
||||
teamId,
|
||||
session
|
||||
@@ -213,13 +213,20 @@ export const createOrGetCollectionTags = async ({
|
||||
datasetId: string;
|
||||
teamId: string;
|
||||
session?: ClientSession;
|
||||
}): Promise<string[]> => {
|
||||
if (!tags.length) return [];
|
||||
const existingTags = await MongoDatasetCollectionTags.find({
|
||||
teamId,
|
||||
datasetId,
|
||||
$expr: { $in: ['$tag', tags] }
|
||||
});
|
||||
}) => {
|
||||
if (!tags) return undefined;
|
||||
|
||||
if (tags.length === 0) return [];
|
||||
|
||||
const existingTags = await MongoDatasetCollectionTags.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId,
|
||||
tag: { $in: tags }
|
||||
},
|
||||
undefined,
|
||||
{ session }
|
||||
).lean();
|
||||
|
||||
const existingTagContents = existingTags.map((tag) => tag.tag);
|
||||
const newTagContents = tags.filter((tag) => !existingTagContents.includes(tag));
|
||||
@@ -235,3 +242,29 @@ export const createOrGetCollectionTags = async ({
|
||||
|
||||
return [...existingTags.map((tag) => tag._id), ...newTags.map((tag) => tag._id)];
|
||||
};
|
||||
|
||||
export const collectionTagsToTagLabel = async ({
|
||||
datasetId,
|
||||
tags
|
||||
}: {
|
||||
datasetId: string;
|
||||
tags?: string[];
|
||||
}) => {
|
||||
if (!tags) return undefined;
|
||||
if (tags.length === 0) return;
|
||||
|
||||
// Get all the tags
|
||||
const collectionTags = await MongoDatasetCollectionTags.find({ datasetId }, undefined, {
|
||||
...readFromSecondary
|
||||
}).lean();
|
||||
const tagsMap = new Map<string, string>();
|
||||
collectionTags.forEach((tag) => {
|
||||
tagsMap.set(String(tag._id), tag.tag);
|
||||
});
|
||||
|
||||
return tags
|
||||
.map((tag) => {
|
||||
return tagsMap.get(tag) || '';
|
||||
})
|
||||
.filter(Boolean);
|
||||
};
|
||||
|
@@ -21,6 +21,8 @@ export type DatasetCollectionsListItemType = {
|
||||
trainingType?: DatasetCollectionSchemaType['trainingType'];
|
||||
tags?: string[];
|
||||
|
||||
externalFileId?: string;
|
||||
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
permission: DatasetPermission;
|
||||
|
@@ -10,6 +10,7 @@ import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
|
||||
async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
|
||||
const { id } = req.query as { id: string };
|
||||
@@ -35,6 +36,10 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
|
||||
return {
|
||||
...collection,
|
||||
...getCollectionSourceData(collection),
|
||||
tags: await collectionTagsToTagLabel({
|
||||
datasetId: collection.datasetId._id,
|
||||
tags: collection.tags
|
||||
}),
|
||||
permission,
|
||||
file
|
||||
};
|
||||
|
@@ -11,6 +11,8 @@ import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { PagingData } from '@/types';
|
||||
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
|
||||
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
|
||||
async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectionsListItemType>> {
|
||||
let {
|
||||
@@ -60,12 +62,15 @@ async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectio
|
||||
trainingType: 1,
|
||||
fileId: 1,
|
||||
rawLink: 1,
|
||||
tags: 1
|
||||
tags: 1,
|
||||
externalFileId: 1
|
||||
};
|
||||
|
||||
// not count data amount
|
||||
if (simple) {
|
||||
const collections = await MongoDatasetCollection.find(match)
|
||||
const collections = await MongoDatasetCollection.find(match, undefined, {
|
||||
...readFromSecondary
|
||||
})
|
||||
.select(selectField)
|
||||
.sort({
|
||||
updateTime: -1
|
||||
@@ -78,6 +83,10 @@ async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectio
|
||||
data: await Promise.all(
|
||||
collections.map(async (item) => ({
|
||||
...item,
|
||||
tags: await collectionTagsToTagLabel({
|
||||
datasetId,
|
||||
tags: item.tags
|
||||
}),
|
||||
dataAmount: 0,
|
||||
trainingAmount: 0,
|
||||
permission
|
||||
@@ -153,12 +162,18 @@ async function handler(req: NextApiRequest): Promise<PagingData<DatasetCollectio
|
||||
}
|
||||
}
|
||||
]),
|
||||
MongoDatasetCollection.countDocuments(match)
|
||||
MongoDatasetCollection.countDocuments(match, {
|
||||
...readFromSecondary
|
||||
})
|
||||
]);
|
||||
|
||||
const data = await Promise.all(
|
||||
collections.map(async (item) => ({
|
||||
...item,
|
||||
tags: await collectionTagsToTagLabel({
|
||||
datasetId,
|
||||
tags: item.tags
|
||||
}),
|
||||
permission
|
||||
}))
|
||||
);
|
||||
|
@@ -1,5 +1,8 @@
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { getCollectionUpdateTime } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
import {
|
||||
createOrGetCollectionTags,
|
||||
getCollectionUpdateTime
|
||||
} from '@fastgpt/service/core/dataset/collection/utils';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
@@ -11,11 +14,16 @@ import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
|
||||
export type UpdateDatasetCollectionParams = {
|
||||
id: string;
|
||||
id?: string;
|
||||
parentId?: string;
|
||||
name?: string;
|
||||
tags?: string[];
|
||||
tags?: string[]; // Not tag id, is tag label
|
||||
forbid?: boolean;
|
||||
createTime?: Date;
|
||||
|
||||
// External file id
|
||||
datasetId?: string;
|
||||
externalFileId?: string;
|
||||
};
|
||||
|
||||
// Set folder collection children forbid status
|
||||
@@ -65,14 +73,22 @@ const updateFolderChildrenForbid = async ({
|
||||
};
|
||||
|
||||
async function handler(req: ApiRequestProps<UpdateDatasetCollectionParams>) {
|
||||
const { id, parentId, name, tags, forbid } = req.body;
|
||||
let { datasetId, externalFileId, id, parentId, name, tags, forbid, createTime } = req.body;
|
||||
|
||||
if (datasetId && externalFileId) {
|
||||
const collection = await MongoDatasetCollection.findOne({ datasetId, externalFileId }, '_id');
|
||||
if (!collection) {
|
||||
return Promise.reject(CommonErrEnum.fileNotFound);
|
||||
}
|
||||
id = collection._id;
|
||||
}
|
||||
|
||||
if (!id) {
|
||||
return Promise.reject(CommonErrEnum.missingParams);
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const { collection } = await authDatasetCollection({
|
||||
const { collection, teamId } = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
@@ -81,6 +97,13 @@ async function handler(req: ApiRequestProps<UpdateDatasetCollectionParams>) {
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
const collectionTags = await createOrGetCollectionTags({
|
||||
tags,
|
||||
teamId,
|
||||
datasetId: collection.datasetId._id,
|
||||
session
|
||||
});
|
||||
|
||||
await MongoDatasetCollection.updateOne(
|
||||
{
|
||||
_id: id
|
||||
@@ -89,8 +112,9 @@ async function handler(req: ApiRequestProps<UpdateDatasetCollectionParams>) {
|
||||
$set: {
|
||||
...(parentId !== undefined && { parentId: parentId || null }),
|
||||
...(name && { name, updateTime: getCollectionUpdateTime({ name }) }),
|
||||
...(tags && { tags }),
|
||||
...(forbid !== undefined && { forbid })
|
||||
...(collectionTags !== undefined && { tags: collectionTags }),
|
||||
...(forbid !== undefined && { forbid }),
|
||||
...(createTime !== undefined && { createTime })
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@@ -35,8 +35,8 @@ const TagsPopOver = ({
|
||||
const tagList = useMemo(
|
||||
() =>
|
||||
(collectionTags
|
||||
?.map((tagId) => {
|
||||
const tagObject = allDatasetTags.find((tag) => tag._id === tagId);
|
||||
?.map((item) => {
|
||||
const tagObject = allDatasetTags.find((tag) => tag.tag === item);
|
||||
return tagObject ? { _id: tagObject._id, tag: tagObject.tag } : null;
|
||||
})
|
||||
.filter((tag) => tag !== null) as {
|
||||
@@ -153,9 +153,9 @@ const TagsPopOver = ({
|
||||
setIsUpdateLoading(true);
|
||||
await putDatasetCollectionById({
|
||||
id: currentCollection._id,
|
||||
tags: checkedTags.map((tag) => tag._id)
|
||||
tags: checkedTags.map((tag) => tag.tag)
|
||||
});
|
||||
setCollectionTags(checkedTags.map((tag) => tag._id));
|
||||
setCollectionTags(checkedTags.map((tag) => tag.tag));
|
||||
setIsUpdateLoading(false);
|
||||
}}
|
||||
display={showTagManage || overflowTags.length > 0 ? 'block' : 'none'}
|
||||
|
@@ -1,10 +1,9 @@
|
||||
import React, { useState, useRef, useMemo } from 'react';
|
||||
import React, { useState, useMemo } from 'react';
|
||||
import { Box, Card, IconButton, Flex, Button, useTheme } from '@chakra-ui/react';
|
||||
import {
|
||||
getDatasetDataList,
|
||||
delOneDatasetDataById,
|
||||
getDatasetCollectionById,
|
||||
putDatasetDataById
|
||||
getDatasetCollectionById
|
||||
} from '@/web/core/dataset/api';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
@@ -21,7 +20,6 @@ import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection
|
||||
import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import MyTag from '@fastgpt/web/components/common/Tag/index';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import { useSystem } from '@fastgpt/web/hooks/useSystem';
|
||||
@@ -96,24 +94,27 @@ const DataCard = () => {
|
||||
<Flex flexDirection={'column'} h={'100%'}>
|
||||
{/* Header */}
|
||||
<Flex alignItems={'center'} px={6}>
|
||||
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
|
||||
<Box>
|
||||
<Box alignItems={'center'} gap={2} display={isPc ? 'flex' : ''}>
|
||||
{collection?._id && (
|
||||
<RawSourceBox
|
||||
collectionId={collection._id}
|
||||
{...getCollectionSourceData(collection)}
|
||||
fontSize={['sm', 'md']}
|
||||
color={'black'}
|
||||
textDecoration={'none'}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
{feConfigs?.isPlus && !!collection?.tags?.length && (
|
||||
<TagsPopOver currentCollection={collection} />
|
||||
<Box flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
|
||||
<Box
|
||||
className="textEllipsis"
|
||||
alignItems={'center'}
|
||||
gap={2}
|
||||
display={isPc ? 'flex' : ''}
|
||||
>
|
||||
{collection?._id && (
|
||||
<RawSourceBox
|
||||
collectionId={collection._id}
|
||||
{...getCollectionSourceData(collection)}
|
||||
fontSize={['sm', 'md']}
|
||||
color={'black'}
|
||||
textDecoration={'none'}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
</Flex>
|
||||
{feConfigs?.isPlus && !!collection?.tags?.length && (
|
||||
<TagsPopOver currentCollection={collection} />
|
||||
)}
|
||||
</Box>
|
||||
{canWrite && (
|
||||
<Box>
|
||||
<Button
|
||||
|
@@ -243,8 +243,9 @@ const Info = ({ datasetId }: { datasetId: string }) => {
|
||||
const vectorModel = vectorModelList.find((item) => item.model === e);
|
||||
if (!vectorModel) return;
|
||||
return onOpenConfirmRebuild(() => {
|
||||
setValue('vectorModel', vectorModel);
|
||||
return onRebuilding(vectorModel);
|
||||
return onRebuilding(vectorModel).then(() => {
|
||||
setValue('vectorModel', vectorModel);
|
||||
});
|
||||
})();
|
||||
}}
|
||||
/>
|
||||
|
Reference in New Issue
Block a user