feat: Text check before synchronization (#689)

* fix: icon

* fix: web selector

* fix: web selector

* perf: link sync

* dev doc

* chomd doc

* perf: git intro

* 466 intro

* intro img

* add json editor (#5)

* team limit

* websync limit

* json editor

* text editor

* perf: search test

* change cq value type

* doc

* intro img

---------

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-01-04 23:19:24 +08:00
committed by GitHub
parent c2abbb579f
commit 828829011a
64 changed files with 1789 additions and 1489 deletions

View File

@@ -1,14 +1,12 @@
### Fast GPT V4.6.6
1. 新增 - [问题补全模块](https://doc.fastgpt.in/docs/workflow/modules/coreferenceresolution/)
2. 新增 - [文本编辑模块](https://doc.fastgpt.in/docs/workflow/modules/text_editor/)
3. 新增 - [判断器模块](https://doc.fastgpt.in/docs/workflow/modules/tfswitch/)
4. 新增 - [自定义反馈模块](https://doc.fastgpt.in/docs/workflow/modules/custom_feedback/)
5. 新增 - 【内容提取】模块支持选择模型,以及字段枚举
6. 优化 - docx读取兼容表格表格转markdown
7. 优化 - 高级编排连接线交互
8. 优化 - 由于 html2md 导致的 cpu密集计算阻断线程问题
9. 修复 - 高级编排提示词提取描述
10. [使用文档](https://doc.fastgpt.in/docs/intro/)
11. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow)
12. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/)
1. 新增 - Http 模块请求头支持 Json 编辑器。
2. 新增 - 搜索方式:分离向量语义检索,全文检索和重排,通过 RRF 进行排序合并。
3. 新增 - [问题补全模块](https://doc.fastgpt.in/docs/workflow/modules/coreferenceresolution/)
5. 新增 - [文本编辑模块](https://doc.fastgpt.in/docs/workflow/modules/text_editor/)
6. 新增 - [判断器模块](https://doc.fastgpt.in/docs/workflow/modules/tfswitch/)
7. 新增 - [自定义反馈模块](https://doc.fastgpt.in/docs/workflow/modules/custom_feedback/)
8. 新增 - 【内容提取】模块支持选择模型,以及字段枚举
9. [使用文档](https://doc.fastgpt.in/docs/intro/)
10. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow)
11. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/)

View File

@@ -410,6 +410,7 @@
"Read Metadata": "Read Metadata",
"Training Type": "Training Type",
"Updatetime": "Update Time",
"Web page selector": "Web Selector",
"metadata": "Metadata",
"read source": "Read Source",
"source": "Source",
@@ -420,6 +421,12 @@
"active": "Ready",
"syncing": "Syncing"
},
"sync": {
"result": {
"sameRaw": "The content has not changed and no update is required.",
"success": "Start synchronization"
}
},
"training": {
"type chunk": "Chunk",
"type manual": "Manual",

View File

@@ -410,6 +410,7 @@
"Read Metadata": "查看元数据",
"Training Type": "训练模式",
"Updatetime": "更新时间",
"Web page selector": "网站选择器",
"metadata": "元数据",
"read source": "查看原始内容",
"source": "数据来源",
@@ -420,6 +421,12 @@
"active": "已就绪",
"syncing": "同步中"
},
"sync": {
"result": {
"sameRaw": "内容未变动,无需更新",
"success": "开始同步"
}
},
"training": {
"type chunk": "直接分段",
"type manual": "手动",
@@ -515,9 +522,13 @@
},
"score": {
"embedding": "语义检索",
"embedding desc": "通过计算向量之间的距离获取得分,范围为 0~1。",
"fullText": "全文检索",
"fullText desc": "计算相同关键词的得分,范围为 0~无穷。",
"reRank": "结果重排",
"rrf": "RRF 合并"
"reRank desc": "通过 ReRank 模型计算句子之间的关联度,范围为 0~1。",
"rrf": "综合排名",
"rrf desc": "通过倒排计算的方式,合并多个检索结果。"
},
"search mode": "搜索模式"
},

View File

@@ -28,14 +28,14 @@ const NavbarPhone = ({ unread }: { unread: number }) => {
},
{
label: t('navbar.Tools'),
icon: 'phoneTabbar/tabbarMore',
icon: 'phoneTabbar/more',
link: '/tools',
activeLink: ['/tools'],
unread: 0
},
{
label: t('navbar.Account'),
icon: 'phoneTabbar/tabbarMe',
icon: 'phoneTabbar/me',
link: '/account',
activeLink: ['/account'],
unread

View File

@@ -1,5 +1,5 @@
import React, { useMemo, useState } from 'react';
import { Box, Flex, Link, Progress, useTheme } from '@chakra-ui/react';
import { Box, Flex, Link, Progress } from '@chakra-ui/react';
import {
type InputDataType,
RawSourceText
@@ -9,7 +9,6 @@ import NextLink from 'next/link';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import MyTooltip from '@/components/MyTooltip';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import dynamic from 'next/dynamic';
import MyBox from '@/components/common/MyBox';
import { getDatasetDataItemById } from '@/web/core/dataset/api';
@@ -19,6 +18,36 @@ import { SearchScoreTypeEnum, SearchScoreTypeMap } from '@fastgpt/global/core/da
const InputDataModal = dynamic(() => import('@/pages/dataset/detail/components/InputDataModal'));
type ScoreItemType = SearchDataResponseItemType['score'][0];
const scoreTheme: Record<
string,
{
color: string;
bg: string;
borderColor: string;
colorSchema: string;
}
> = {
'0': {
color: '#6F5DD7',
bg: '#F0EEFF',
borderColor: '#D3CAFF',
colorSchema: 'purple'
},
'1': {
color: '#9E53C1',
bg: '#FAF1FF',
borderColor: '#ECF',
colorSchema: 'pink'
},
'2': {
color: '#0884DD',
bg: '#F0FBFF',
borderColor: '#BCE7FF',
colorSchema: 'blue'
}
};
const QuoteItem = ({
quoteItem,
canViewSource,
@@ -29,8 +58,6 @@ const QuoteItem = ({
linkToDataset?: boolean;
}) => {
const { t } = useTranslation();
const { isPc } = useSystemStore();
const theme = useTheme();
const [editInputData, setEditInputData] = useState<InputDataType & { collectionId: string }>();
const { mutate: onclickEdit, isLoading } = useRequest({
@@ -43,54 +70,46 @@ const QuoteItem = ({
errorToast: t('core.dataset.data.get data error')
});
const rank = useMemo(() => {
if (quoteItem.score.length === 1) {
return quoteItem.score[0].index;
}
const rrf = quoteItem.score?.find((item) => item.type === SearchScoreTypeEnum.rrf);
if (rrf) return rrf.index;
return 0;
}, [quoteItem.score]);
const score = useMemo(() => {
let searchScore: number | undefined = undefined;
let text = '';
const reRankScore = quoteItem.score?.find((item) => item.type === SearchScoreTypeEnum.reRank);
if (reRankScore) {
searchScore = reRankScore.value;
text = t('core.dataset.search.Rerank score');
if (!Array.isArray(quoteItem.score)) {
return {
primaryScore: undefined,
secondaryScore: []
};
}
const embScore = quoteItem.score?.find((item) => item.type === SearchScoreTypeEnum.embedding);
if (embScore && quoteItem.score.length === 1) {
searchScore = embScore.value;
text = t('core.dataset.search.Embedding score');
}
// rrf -> rerank -> embedding -> fullText 优先级
let rrfScore: ScoreItemType | undefined = undefined;
let reRankScore: ScoreItemType | undefined = undefined;
let embeddingScore: ScoreItemType | undefined = undefined;
let fullTextScore: ScoreItemType | undefined = undefined;
const detailScore = (() => {
if (Array.isArray(quoteItem.score)) {
return quoteItem.score
.map(
(item) =>
`${t('core.dataset.search.Search type')}: ${t(SearchScoreTypeMap[item.type]?.label)}
${t('core.dataset.search.Rank')}: ${item.index + 1}
${t('core.dataset.search.Score')}: ${item.value.toFixed(4)}`
)
.join('\n----\n');
quoteItem.score.forEach((item) => {
if (item.type === SearchScoreTypeEnum.rrf) {
rrfScore = item;
} else if (item.type === SearchScoreTypeEnum.reRank) {
reRankScore = item;
} else if (item.type === SearchScoreTypeEnum.embedding) {
embeddingScore = item;
} else if (item.type === SearchScoreTypeEnum.fullText) {
fullTextScore = item;
}
return 'null';
})();
});
const primaryScore = (rrfScore ||
reRankScore ||
embeddingScore ||
fullTextScore) as unknown as ScoreItemType;
const secondaryScore = [rrfScore, reRankScore, embeddingScore, fullTextScore].filter(
// @ts-ignore
(item) => item && primaryScore && item.type !== primaryScore.type
) as unknown as ScoreItemType[];
return {
value: searchScore,
tip: t('core.dataset.Search score tip', {
scoreText: text ? `${text}\n` : text,
detailScore
})
primaryScore,
secondaryScore
};
}, [quoteItem.score, t]);
}, [quoteItem.score]);
return (
<>
@@ -101,86 +120,97 @@ ${t('core.dataset.search.Score')}: ${item.value.toFixed(4)}`
fontSize={'sm'}
whiteSpace={'pre-wrap'}
_hover={{ '& .hover-data': { display: 'flex' } }}
h={'100%'}
display={'flex'}
flexDirection={'column'}
>
<Flex alignItems={'flex-end'} mb={3}>
{rank !== undefined && (
<MyTooltip label={t('core.dataset.search.Rank Tip')}>
<Box px={2} py={'3px'} mr={3} bg={'myGray.200'} borderRadius={'md'}>
# {rank + 1}
</Box>
<Flex alignItems={'center'} mb={3}>
{score?.primaryScore && (
<MyTooltip label={t(SearchScoreTypeMap[score.primaryScore.type]?.desc)}>
<Flex
px={'12px'}
py={'5px'}
mr={4}
borderRadius={'md'}
color={'primary.700'}
bg={'primary.50'}
borderWidth={'1px'}
borderColor={'primary.200'}
alignItems={'center'}
fontSize={'sm'}
>
<Box>#{score.primaryScore.index + 1}</Box>
<Box borderRightColor={'primary.700'} borderRightWidth={'1px'} h={'14px'} mx={2} />
<Box>
{t(SearchScoreTypeMap[score.primaryScore.type]?.label)}
{SearchScoreTypeMap[score.primaryScore.type]?.showScore
? ` ${score.primaryScore.value.toFixed(4)}`
: ''}
</Box>
</Flex>
</MyTooltip>
)}
<RawSourceText
fontWeight={'bold'}
color={'black'}
sourceName={quoteItem.sourceName}
sourceId={quoteItem.sourceId}
canView={canViewSource}
/>
<Box flex={1} />
{linkToDataset && (
<Link
as={NextLink}
className="hover-data"
display={'none'}
alignItems={'center'}
color={'primary.500'}
href={`/dataset/detail?datasetId=${quoteItem.datasetId}&currentTab=dataCard&collectionId=${quoteItem.collectionId}`}
>
{t('core.dataset.Go Dataset')}
<MyIcon name={'common/rightArrowLight'} w={'10px'} />
</Link>
)}
{score.secondaryScore.map((item, i) => (
<MyTooltip key={item.type} label={t(SearchScoreTypeMap[item.type]?.desc)}>
<Box fontSize={'xs'} mr={3}>
<Flex alignItems={'flex-start'} lineHeight={1.2} mb={1}>
<Box
px={'5px'}
borderWidth={'1px'}
borderRadius={'sm'}
mr={1}
{...(scoreTheme[i] && scoreTheme[i])}
>
<Box transform={'scale(0.9)'}>#{item.index + 1}</Box>
</Box>
<Box transform={'scale(0.9)'}>
{t(SearchScoreTypeMap[item.type]?.label)}: {item.value.toFixed(4)}
</Box>
</Flex>
<Box h={'4px'}>
{SearchScoreTypeMap[item.type]?.showScore && (
<Progress
value={item.value * 100}
h={'4px'}
w={'100%'}
size="sm"
borderRadius={'20px'}
colorScheme={scoreTheme[i]?.colorSchema}
bg="#E8EBF0"
/>
)}
</Box>
</Box>
</MyTooltip>
))}
</Flex>
<Box color={'black'}>{quoteItem.q}</Box>
<Box color={'myGray.600'}>{quoteItem.a}</Box>
<Box flex={'1 0 0'}>
<Box color={'black'}>{quoteItem.q}</Box>
<Box color={'myGray.600'}>{quoteItem.a}</Box>
</Box>
{canViewSource && (
<Flex alignItems={'center'} mt={3} gap={4} color={'myGray.500'} fontSize={'xs'}>
{isPc && (
<Flex border={theme.borders.base} px={3} borderRadius={'xs'} lineHeight={'16px'}>
ID: {quoteItem.id}
</Flex>
)}
<MyTooltip label={t('core.dataset.Quote Length')}>
<Flex alignItems={'center'}>
<MyIcon name="common/text/t" w={'14px'} mr={1} color={'myGray.500'} />
{quoteItem.q.length + (quoteItem.a?.length || 0)}
</Flex>
</MyTooltip>
{canViewSource && score && (
<MyTooltip label={score.tip}>
<Flex alignItems={'center'}>
<MyIcon name={'kbTest'} w={'12px'} />
{score.value ? (
<>
<Progress
mx={2}
w={['60px', '90px']}
value={score?.value * 100}
size="sm"
borderRadius={'20px'}
colorScheme="myGray"
border={theme.borders.base}
/>
<Box>{score?.value.toFixed(4)}</Box>
</>
) : (
<Box ml={1} cursor={'pointer'}>
{t('core.dataset.search.Read score')}
</Box>
)}
</Flex>
</MyTooltip>
)}
<RawSourceText
fontWeight={'bold'}
color={'black'}
sourceName={quoteItem.sourceName}
sourceId={quoteItem.sourceId}
canView={canViewSource}
/>
<Box flex={1} />
{quoteItem.id && (
<MyTooltip label={t('core.dataset.data.Edit')}>
<Box
className="hover-data"
display={['flex', 'none']}
bg={'rgba(255,255,255,0.9)'}
alignItems={'center'}
justifyContent={'center'}
boxShadow={'-10px 0 10px rgba(255,255,255,1)'}
@@ -199,6 +229,19 @@ ${t('core.dataset.search.Score')}: ${item.value.toFixed(4)}`
</Box>
</MyTooltip>
)}
{linkToDataset && (
<Link
as={NextLink}
className="hover-data"
display={'none'}
alignItems={'center'}
color={'primary.500'}
href={`/dataset/detail?datasetId=${quoteItem.datasetId}&currentTab=dataCard&collectionId=${quoteItem.collectionId}`}
>
{t('core.dataset.Go Dataset')}
<MyIcon name={'common/rightArrowLight'} w={'10px'} />
</Link>
)}
</Flex>
)}
</MyBox>

View File

@@ -94,7 +94,7 @@ const NodeCQNode = React.memo(function NodeCQNode({ data }: { data: FlowModuleIt
/>
<SourceHandle
handleKey={item.key}
valueType={ModuleIOValueTypeEnum.string}
valueType={ModuleIOValueTypeEnum.boolean}
/>
</Box>
</Box>

View File

@@ -64,6 +64,10 @@ const RenderList: {
{
types: [FlowNodeInputTypeEnum.addInputParam],
Component: dynamic(() => import('./templates/AddInputParam'))
},
{
types: [FlowNodeInputTypeEnum.JSONEditor],
Component: dynamic(() => import('./templates/JsonEditor'))
}
];
const UserChatInput = dynamic(() => import('./templates/UserChatInput'));

View File

@@ -0,0 +1,39 @@
import React, { useCallback } from 'react';
import type { RenderInputProps } from '../type';
import { onChangeNode } from '../../../../FlowProvider';
import { useTranslation } from 'next-i18next';
import JSONEditor from '@fastgpt/web/components/common/Textarea/JsonEditor';
const JsonEditor = ({ item, moduleId }: RenderInputProps) => {
const { t } = useTranslation();
const update = useCallback(
(value: string) => {
onChangeNode({
moduleId,
type: 'updateInput',
key: item.key,
value: {
...item,
value
}
});
},
[item, moduleId]
);
return (
<JSONEditor
title={t(item.label)}
bg={'myWhite.400'}
placeholder={t(item.placeholder || '')}
resize
defaultValue={item.value}
onChange={(e) => {
update(e);
}}
/>
);
};
export default React.memo(JsonEditor);

View File

@@ -50,7 +50,8 @@ const defaultFeConfigs: FastGPTFeConfigsType = {
concatMd:
'* 项目开源地址: [FastGPT GitHub](https://github.com/labring/FastGPT)\n* 交流群: ![](https://doc.fastgpt.in/wechat-fastgpt.webp)',
limit: {
exportLimitMinutes: 0
exportDatasetLimitMinutes: 0,
websiteSyncLimitMinuted: 0
},
scripts: [],
favicon: '/favicon.ico'

View File

@@ -1,73 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { addLog } from '@fastgpt/service/common/system/log';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
let { datasetId } = req.query as {
datasetId: string;
};
if (!datasetId) {
throw new Error('缺少参数');
}
// 凭证校验
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
await limitCheck({
datasetId,
userId
});
jsonRes(res);
} catch (err) {
res.status(500);
jsonRes(res, {
code: 500,
error: err
});
}
}
export async function limitCheck({ datasetId, userId }: { datasetId: string; userId: string }) {
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
const limitMinutesAgo = new Date(
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
);
// auth export times
const authTimes = await MongoUser.findOne(
{
_id: userId,
$or: [
{ 'limit.exportKbTime': { $exists: false } },
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
]
},
'_id limit'
);
if (!authTimes) {
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
return Promise.reject(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
}
// auth max data
const total = await MongoDatasetData.countDocuments({
datasetId: { $in: exportIds }
});
addLog.info(`export datasets: ${datasetId}`, { total });
if (total > 100000) {
return Promise.reject('数据量超出 10 万,无法导出');
}
}

View File

@@ -2,14 +2,20 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { loadingOneChunkCollection } from '@fastgpt/service/core/dataset/collection/utils';
import {
getCollectionAndRawText,
reloadCollectionChunks
} from '@fastgpt/service/core/dataset/collection/utils';
import { delCollectionRelevantData } from '@fastgpt/service/core/dataset/data/controller';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
import {
DatasetCollectionSyncResultEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constant';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -32,6 +38,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return Promise.reject(DatasetErrEnum.unLinkCollection);
}
const { rawText, isSameRawText } = await getCollectionAndRawText({
collection
});
if (isSameRawText) {
return jsonRes(res, {
data: DatasetCollectionSyncResultEnum.sameRaw
});
}
/* Not the same original text, create and reload */
const vectorModelData = getVectorModel(collection.datasetId.vectorModel);
const agentModelData = getQAModel(collection.datasetId.agentModel);
// create training bill
@@ -45,26 +63,27 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
});
// create a collection and delete old
const { _id } = await MongoDatasetCollection.create({
parentId: collection.parentId,
const _id = await createOneCollection({
teamId: collection.teamId,
tmbId: collection.tmbId,
parentId: collection.parentId,
datasetId: collection.datasetId._id,
type: collection.type,
name: collection.name,
createTime: collection.createTime,
type: collection.type,
trainingType: collection.trainingType,
chunkSize: collection.chunkSize,
fileId: collection.fileId,
rawLink: collection.rawLink,
metadata: collection.metadata
metadata: collection.metadata,
createTime: collection.createTime
});
// start load
await loadingOneChunkCollection({
await reloadCollectionChunks({
collectionId: _id,
tmbId,
billId
billId,
rawText
});
// delete old collection
@@ -73,7 +92,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
fileIds: collection.fileId ? [collection.fileId] : []
});
jsonRes(res);
jsonRes(res, {
data: DatasetCollectionSyncResultEnum.success
});
} catch (err) {
jsonRes(res, {
code: 500,

View File

@@ -1,13 +1,15 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes, responseWriteController } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { addLog } from '@fastgpt/service/common/system/log';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
import { limitCheck } from './checkExportLimit';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import {
checkExportDatasetLimit,
updateExportDatasetLimit
} from '@fastgpt/service/support/user/utils';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -21,11 +23,11 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
}
// 凭证校验
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
const { teamId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
await limitCheck({
userId,
datasetId
await checkExportDatasetLimit({
teamId,
limitMinutes: global.feConfigs?.limit?.exportDatasetLimitMinutes
});
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
@@ -43,7 +45,9 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
datasetId: { $in: exportIds }
},
'q a'
).cursor();
)
.limit(50000)
.cursor();
const write = responseWriteController({
res,
@@ -59,12 +63,10 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
write(`\n"${q}","${a}"`);
});
cursor.on('end', async () => {
cursor.on('end', () => {
cursor.close();
res.end();
await MongoUser.findByIdAndUpdate(userId, {
'limit.exportKbTime': new Date()
});
updateExportDatasetLimit(teamId);
});
cursor.on('error', (err) => {

View File

@@ -0,0 +1,34 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { datasetId } = req.query as {
datasetId: string;
};
if (!datasetId) {
throw new Error('datasetId is required');
}
// 凭证校验
const { teamId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
await checkExportDatasetLimit({
teamId,
limitMinutes: global.feConfigs?.limit?.exportDatasetLimitMinutes
});
jsonRes(res);
} catch (err) {
res.status(500);
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -0,0 +1,27 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { checkWebSyncLimit } from '@fastgpt/service/support/user/utils';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
// 凭证校验
const { teamId } = await authCert({ req, authToken: true });
await checkWebSyncLimit({
teamId,
limitMinutes: global.feConfigs?.limit?.websiteSyncLimitMinuted
});
jsonRes(res);
} catch (err) {
res.status(500);
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -46,7 +46,8 @@ import {
DatasetCollectionTrainingModeEnum,
DatasetTypeEnum,
DatasetTypeMap,
DatasetStatusEnum
DatasetStatusEnum,
DatasetCollectionSyncResultMap
} from '@fastgpt/global/core/dataset/constant';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import EditFolderModal, { useEditFolder } from '../../component/EditFolderModal';
@@ -61,6 +62,7 @@ import { useUserStore } from '@/web/support/user/useUserStore';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { DatasetCollectionSyncResultEnum } from '../../../../../../../packages/global/core/dataset/constant';
const FileImportModal = dynamic(() => import('./Import/ImportModal'), {});
const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {});
@@ -246,8 +248,12 @@ const CollectionCard = () => {
mutationFn: (collectionId: string) => {
return postLinkCollectionSync(collectionId);
},
onSuccess() {
onSuccess(res: DatasetCollectionSyncResultEnum) {
getData(pageNum);
toast({
status: 'success',
title: t(DatasetCollectionSyncResultMap[res]?.label)
});
},
errorToast: t('core.dataset.error.Start Sync Failed')
});

View File

@@ -121,46 +121,55 @@ const DataCard = () => {
[collection?.canWrite, userInfo?.team?.role]
);
const metadataList = useMemo(
() =>
collection
const metadataList = useMemo(() => {
if (!collection) return [];
const webSelector =
collection?.datasetId?.websiteConfig?.selector || collection?.metadata?.webPageSelector;
return [
{
label: t('core.dataset.collection.metadata.source'),
value: t(DatasetCollectionTypeMap[collection.type]?.name)
},
{
label: t('core.dataset.collection.metadata.source name'),
value: collection.file?.filename || collection?.rawLink || collection?.name
},
{
label: t('core.dataset.collection.metadata.source size'),
value: collection.file ? formatFileSize(collection.file.length) : '-'
},
{
label: t('core.dataset.collection.metadata.Createtime'),
value: formatTime2YMDHM(collection.createTime)
},
{
label: t('core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('core.dataset.collection.metadata.Raw text length'),
value: collection.rawTextLength ?? '-'
},
{
label: t('core.dataset.collection.metadata.Training Type'),
value: t(DatasetCollectionTrainingTypeMap[collection.trainingType]?.label)
},
{
label: t('core.dataset.collection.metadata.Chunk Size'),
value: collection.chunkSize || '-'
},
...(webSelector
? [
{
label: t('core.dataset.collection.metadata.source'),
value: t(DatasetCollectionTypeMap[collection.type]?.name)
},
{
label: t('core.dataset.collection.metadata.source name'),
value: collection.file?.filename || collection?.rawLink || collection?.name
},
{
label: t('core.dataset.collection.metadata.source size'),
value: collection.file ? formatFileSize(collection.file.length) : '-'
},
{
label: t('core.dataset.collection.metadata.Createtime'),
value: formatTime2YMDHM(collection.createTime)
},
{
label: t('core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('core.dataset.collection.metadata.Raw text length'),
value: collection.rawTextLength ?? '-'
},
{
label: t('core.dataset.collection.metadata.Training Type'),
value: t(DatasetCollectionTrainingTypeMap[collection.trainingType]?.label)
},
{
label: t('core.dataset.collection.metadata.Chunk Size'),
value: collection.chunkSize || '-'
label: t('core.dataset.collection.metadata.Web page selector'),
value: webSelector
}
]
: [],
[collection, t]
);
: [])
];
}, [collection, t]);
return (
<Box ref={BoxRef} position={'relative'} px={5} py={[1, 5]} h={'100%'} overflow={'overlay'}>

View File

@@ -41,6 +41,7 @@ export type FileItemType = {
type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link;
fileId?: string;
rawLink?: string;
metadata?: Record<string, any>;
};
export interface Props extends BoxProps {
@@ -232,7 +233,7 @@ const FileSelect = ({
// link fetch
const onUrlFetch = useCallback(
(e: UrlFetchResponse) => {
const result: FileItemType[] = e.map<FileItemType>(({ url, content }) => {
const result: FileItemType[] = e.map<FileItemType>(({ url, content, selector }) => {
const { chunks, tokens } = splitText2Chunks({
text: content,
chunkLen,
@@ -250,7 +251,10 @@ const FileSelect = ({
chunks: chunks.map((chunk) => ({
q: chunk,
a: ''
}))
})),
metadata: {
webPageSelector: selector
}
};
});
onPushFiles(result);

View File

@@ -156,19 +156,24 @@ const Provider = ({
return formatModelPrice2Read(totalTokens * inputPrice);
}, [inputPrice, mode, outputPrice, totalTokens]);
/* start upload data */
/*
start upload data
1. create training bill
2. create collection
3. upload chunks
*/
const { mutate: onclickUpload, isLoading: uploading } = useRequest({
mutationFn: async (props?: { prompt?: string }) => {
const { prompt } = props || {};
let totalInsertion = 0;
for await (const file of files) {
const chunks = file.chunks;
// create training bill
const billId = await postCreateTrainingBill({
name: t('dataset.collections.Create Training Data', { filename: file.filename }),
vectorModel,
agentModel
});
// create a file collection and training bill
const collectionId = await postDatasetCollection({
datasetId,
@@ -181,10 +186,12 @@ const Provider = ({
trainingType: collectionTrainingType,
qaPrompt: mode === TrainingModeEnum.qa ? prompt : '',
rawTextLength: file.rawText.length,
hashRawText: hashStr(file.rawText)
hashRawText: hashStr(file.rawText),
metadata: file.metadata
});
// upload data
// upload chunks
const chunks = file.chunks;
const { insertLen } = await chunksUpload({
collectionId,
billId,

View File

@@ -60,6 +60,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
const [inputType, setInputType] = useState<'text' | 'file'>('text');
const [datasetTestItem, setDatasetTestItem] = useState<SearchTestStoreItemType>();
const [refresh, setRefresh] = useState(false);
const [isFocus, setIsFocus] = useState(false);
const { File, onOpen } = useSelectFile({
fileType: '.csv',
multiple: false
@@ -169,7 +170,20 @@ const Test = ({ datasetId }: { datasetId: string }) => {
py={4}
borderRight={['none', theme.borders.base]}
>
<Box border={'2px solid'} borderColor={'primary.500'} p={3} mx={4} borderRadius={'md'}>
<Box
border={'2px solid'}
p={3}
mx={4}
borderRadius={'md'}
{...(isFocus
? {
borderColor: 'primary.500',
boxShadow: '0px 0px 0px 2.4px rgba(51, 112, 255, 0.15)'
}
: {
borderColor: 'primary.300'
})}
>
{/* header */}
<Flex alignItems={'center'} justifyContent={'space-between'}>
<MySelect
@@ -221,8 +235,12 @@ const Test = ({ datasetId }: { datasetId: string }) => {
variant={'unstyled'}
maxLength={datasetDetail.vectorModel.maxToken}
placeholder={t('core.dataset.test.Test Text Placeholder')}
onFocus={() => setIsFocus(true)}
{...register('inputText', {
required: true
required: true,
onBlur: () => {
setIsFocus(false);
}
})}
/>
)}
@@ -340,25 +358,26 @@ const TestHistories = React.memo(function TestHistories({
);
return (
<>
<Flex alignItems={'center'} color={'myGray.600'}>
<MyIcon mr={2} name={'history'} w={'16px'} h={'16px'} />
<Box fontSize={'2xl'}>{t('core.dataset.test.test history')}</Box>
<Flex alignItems={'center'} color={'myGray.900'}>
<MyIcon mr={2} name={'history'} w={'18px'} h={'18px'} color={'myGray.900'} />
<Box fontSize={'xl'}>{t('core.dataset.test.test history')}</Box>
</Flex>
<Box mt={2}>
<Flex py={2} fontWeight={'bold'} borderBottom={theme.borders.sm}>
<Box flex={'0 0 80px'}>{t('core.dataset.search.search mode')}</Box>
<Box flex={1}>{t('core.dataset.test.Test Text')}</Box>
<Box flex={'0 0 70px'}>{t('common.Time')}</Box>
<Box w={'14px'}></Box>
</Flex>
{testHistories.map((item) => (
<Flex
key={item.id}
p={1}
py={2}
px={3}
alignItems={'center'}
borderBottom={theme.borders.base}
borderColor={'borderColor.low'}
borderWidth={'1px'}
borderRadius={'md'}
_notLast={{
mb: 2
}}
_hover={{
bg: '#f4f4f4',
borderColor: 'primary.300',
boxShadow: '1',
'& .delete': {
display: 'block'
}
@@ -369,7 +388,7 @@ const TestHistories = React.memo(function TestHistories({
>
<Box flex={'0 0 80px'}>
{DatasetSearchModeMap[item.searchMode] ? (
<Flex alignItems={'center'}>
<Flex alignItems={'center'} fontWeight={'500'} color={'myGray.500'}>
<MyIcon
name={DatasetSearchModeMap[item.searchMode].icon as any}
w={'12px'}
@@ -381,7 +400,7 @@ const TestHistories = React.memo(function TestHistories({
'-'
)}
</Box>
<Box flex={1} mr={2} wordBreak={'break-all'}>
<Box flex={1} mr={2} wordBreak={'break-all'} fontWeight={'400'}>
{item.text}
</Box>
<Box flex={'0 0 70px'}>{formatTimeToChatTime(item.time)}</Box>
@@ -433,13 +452,20 @@ const TestResults = React.memo(function TestResults({
</Flex>
) : (
<>
<Box fontSize={'xl'} color={'myGray.600'}>
<Flex fontSize={'xl'} color={'myGray.900'} alignItems={'center'}>
<MyIcon name={'common/paramsLight'} w={'18px'} mr={2} />
{t('core.dataset.test.Test params')}
</Box>
<TableContainer mb={3} bg={'myGray.150'} borderRadius={'md'}>
</Flex>
<TableContainer
mt={3}
bg={'primary.50'}
borderRadius={'lg'}
borderWidth={'1px'}
borderColor={'primary.1'}
>
<Table>
<Thead>
<Tr>
<Tr color={'myGray.600'}>
<Th>{t('core.dataset.search.search mode')}</Th>
<Th>{t('core.dataset.search.ReRank')}</Th>
<Th>{t('core.dataset.search.Max Tokens')}</Th>
@@ -447,8 +473,8 @@ const TestResults = React.memo(function TestResults({
</Tr>
</Thead>
<Tbody>
<Tr>
<Td>
<Tr color={'myGray.800'}>
<Td pt={0}>
<Flex alignItems={'center'}>
<MyIcon
name={DatasetSearchModeMap[datasetTestItem.searchMode]?.icon as any}
@@ -458,45 +484,31 @@ const TestResults = React.memo(function TestResults({
{t(DatasetSearchModeMap[datasetTestItem.searchMode]?.title)}
</Flex>
</Td>
<Td>{datasetTestItem.usingReRank ? '✅' : '❌'}</Td>
<Td>{datasetTestItem.limit}</Td>
<Td>{datasetTestItem.similarity}</Td>
<Td pt={0}>{datasetTestItem.usingReRank ? '✅' : '❌'}</Td>
<Td pt={0}>{datasetTestItem.limit}</Td>
<Td pt={0}>{datasetTestItem.similarity}</Td>
</Tr>
</Tbody>
</Table>
</TableContainer>
<Flex alignItems={'center'}>
<Box fontSize={'xl'} color={'myGray.600'}>
<Flex mt={5} mb={3} alignItems={'center'}>
<Flex fontSize={'xl'} color={'myGray.900'} alignItems={'center'}>
<MyIcon name={'common/resultLight'} w={'18px'} mr={2} />
{t('core.dataset.test.Test Result')}
</Box>
</Flex>
<MyTooltip label={t('core.dataset.test.test result tip')} forceShow>
<QuestionOutlineIcon mx={2} color={'myGray.600'} cursor={'pointer'} fontSize={'lg'} />
</MyTooltip>
<Box>({datasetTestItem.duration})</Box>
</Flex>
<Grid
mt={1}
gridTemplateColumns={[
'repeat(1,minmax(0, 1fr))',
'repeat(1,minmax(0, 1fr))',
'repeat(1,minmax(0, 1fr))',
'repeat(1,minmax(0, 1fr))',
'repeat(2,minmax(0, 1fr))'
]}
gridGap={4}
>
<Box mt={1} gap={4}>
{datasetTestItem?.results.map((item, index) => (
<Box
key={item.id}
p={2}
borderRadius={'sm'}
border={theme.borders.base}
_notLast={{ mb: 2 }}
>
<Box key={item.id} p={3} borderRadius={'lg'} bg={'myGray.100'} _notLast={{ mb: 2 }}>
<QuoteItem quoteItem={item} canViewSource />
</Box>
))}
</Grid>
</Box>
</>
)}
</>

View File

@@ -20,9 +20,9 @@ import {
delDatasetById,
getDatasetPaths,
putDatasetById,
postCreateDataset,
getCheckExportLimit
postCreateDataset
} from '@/web/core/dataset/api';
import { checkTeamExportDatasetLimit } from '@/web/support/user/api';
import { useTranslation } from 'next-i18next';
import Avatar from '@/components/Avatar';
import MyIcon from '@fastgpt/web/components/common/Icon';
@@ -99,7 +99,7 @@ const Kb = () => {
const { mutate: exportDataset } = useRequest({
mutationFn: async (dataset: DatasetItemType) => {
setLoading(true);
await getCheckExportLimit(dataset._id);
await checkTeamExportDatasetLimit(dataset._id);
const a = document.createElement('a');
a.href = `/api/core/dataset/exportAll?datasetId=${dataset._id}`;
a.download = `${dataset.name}.csv`;

View File

@@ -68,7 +68,7 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
});
return {
[result.key]: result.value,
[result.key]: true,
[ModuleOutputKeyEnum.responseData]: {
price: user.openaiAccount?.key ? 0 : total,
model: modelName,

View File

@@ -272,8 +272,8 @@ function filterQuote({
});
const quoteText =
filterQuoteQA.length > 0
? `${filterQuoteQA.map((item, index) => getValue(item, index)).join('\n')}`
sortQuoteQAList.length > 0
? `${sortQuoteQAList.map((item, index) => getValue(item, index)).join('\n')}`
: '';
return {

View File

@@ -25,7 +25,10 @@ import type {
} from '@/global/core/dataset/api.d';
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
import {
DatasetCollectionSyncResultEnum,
DatasetTypeEnum
} from '@fastgpt/global/core/dataset/constant';
import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import { PagingData } from '@/types';
@@ -56,9 +59,6 @@ export const postWebsiteSync = (data: PostWebsiteSyncParams) =>
timeout: 600000
}).catch();
export const getCheckExportLimit = (datasetId: string) =>
GET(`/core/dataset/checkExportLimit`, { datasetId });
/* =========== search test ============ */
export const postSearchText = (data: SearchTestProps) =>
POST<SearchTestResponse>(`/core/dataset/searchTest`, data);
@@ -77,7 +77,9 @@ export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
export const delDatasetCollectionById = (params: { collectionId: string }) =>
DELETE(`/core/dataset/collection/delete`, params);
export const postLinkCollectionSync = (collectionId: string) =>
POST(`/core/dataset/collection/sync/link`, { collectionId });
POST<`${DatasetCollectionSyncResultEnum}`>(`/core/dataset/collection/sync/link`, {
collectionId
});
/* =============================== data ==================================== */
/* get dataset list */

View File

@@ -13,6 +13,7 @@ import { defaultDatasetDetail } from '@/constants/dataset';
import type { DatasetUpdateBody } from '@fastgpt/global/core/dataset/api.d';
import { DatasetStatusEnum } from '@fastgpt/global/core/dataset/constant';
import { postCreateTrainingBill } from '@/web/support/wallet/bill/api';
import { checkTeamWebSyncLimit } from '@/web/support/user/api';
type State = {
allDatasets: DatasetListItemType[];
@@ -86,6 +87,8 @@ export const useDatasetStore = create<State>()(
});
},
async startWebsiteSync() {
await checkTeamWebSyncLimit();
const [_, billId] = await Promise.all([
get().updateDataset({
id: get().datasetDetail._id,

View File

@@ -71,3 +71,8 @@ export const postLogin = ({ password, ...props }: PostLoginProps) =>
export const loginOut = () => GET('/support/user/account/loginout');
export const putUserInfo = (data: UserUpdateParams) => PUT('/support/user/account/update', data);
/* team limit */
export const checkTeamExportDatasetLimit = (datasetId: string) =>
GET(`/support/user/team/limit/exportDatasetLimit`, { datasetId });
export const checkTeamWebSyncLimit = () => GET(`/support/user/team/limit/webSyncLimit`);