website sync feature (#4429)

* perf: introduce BullMQ for website sync (#4403)

* perf: introduce BullMQ for website sync

* feat: new redis module

* fix: remove graceful shutdown

* perf: improve UI in dataset detail

- Updated the "change" icon SVG file.
- Modified i18n strings.
- Added new i18n string "immediate_sync".
- Improved UI in dataset detail page, including button icons and
background colors.

* refactor: Add chunkSettings to DatasetSchema

* perf: website sync ux

* env template

* fix: clean up website dataset when updating chunk settings (#4420)

* perf: check setting updated

* perf: worker currency

* feat: init script for website sync refactor (#4425)

* website feature doc

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
This commit is contained in:
Archer
2025-04-02 13:51:58 +08:00
committed by archer
parent e54fe1eed6
commit d171b2d3d8
46 changed files with 1607 additions and 680 deletions

View File

@@ -20,6 +20,8 @@ AIPROXY_API_TOKEN=xxxxx
# 强制将图片转成 base64 传递给模型
MULTIPLE_DATA_TO_BASE64=true
# Redis URL
REDIS_URL=redis://default:password@127.0.0.1:6379
# mongo 数据库连接参数,本地开发连接远程数据库时,可能需要增加 directConnection=true 参数,才能连接上。
MONGODB_URI=mongodb://username:password@0.0.0.0:27017/fastgpt?authSource=admin
@@ -65,4 +67,4 @@ CHECK_INTERNAL_IP=false
# # 日志来源ID前缀
# CHAT_LOG_SOURCE_ID_PREFIX=fastgpt-
# 自定义跨域,不配置时,默认都允许跨域(逗号分割)
ALLOWED_ORIGINS=
ALLOWED_ORIGINS=

View File

@@ -1,6 +1,6 @@
import { exit } from 'process';
/*
/*
Init system
*/
export async function register() {

View File

@@ -1,19 +1,18 @@
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { Dispatch, ReactNode, SetStateAction, useEffect, useState } from 'react';
import { Dispatch, ReactNode, SetStateAction, useState } from 'react';
import { useTranslation } from 'next-i18next';
import { createContext, useContextSelector } from 'use-context-selector';
import { DatasetStatusEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { useDisclosure } from '@chakra-ui/react';
import { checkTeamWebSyncLimit } from '@/web/support/user/team/api';
import { postCreateTrainingUsage } from '@/web/support/wallet/usage/api';
import { getDatasetCollections, postWebsiteSync } from '@/web/core/dataset/api';
import dynamic from 'next/dynamic';
import { usePagination } from '@fastgpt/web/hooks/usePagination';
import { DatasetCollectionsListItemType } from '@/global/core/dataset/type';
import { useRouter } from 'next/router';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { WebsiteConfigFormType } from './WebsiteConfig';
const WebSiteConfigModal = dynamic(() => import('./WebsiteConfig'));
@@ -66,7 +65,7 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) =>
const router = useRouter();
const { parentId = '' } = router.query as { parentId: string };
const { datasetDetail, datasetId, updateDataset } = useContextSelector(
const { datasetDetail, datasetId, updateDataset, loadDatasetDetail } = useContextSelector(
DatasetPageContext,
(v) => v
);
@@ -75,30 +74,31 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) =>
const { openConfirm: openWebSyncConfirm, ConfirmModal: ConfirmWebSyncModal } = useConfirm({
content: t('dataset:start_sync_website_tip')
});
const syncWebsite = async () => {
await checkTeamWebSyncLimit();
await postWebsiteSync({ datasetId: datasetId });
await loadDatasetDetail(datasetId);
};
const {
isOpen: isOpenWebsiteModal,
onOpen: onOpenWebsiteModal,
onClose: onCloseWebsiteModal
} = useDisclosure();
const { mutate: onUpdateDatasetWebsiteConfig } = useRequest({
mutationFn: async (websiteConfig: DatasetSchemaType['websiteConfig']) => {
onCloseWebsiteModal();
await checkTeamWebSyncLimit();
const { runAsync: onUpdateDatasetWebsiteConfig } = useRequest2(
async (websiteConfig: WebsiteConfigFormType) => {
await updateDataset({
id: datasetId,
websiteConfig,
status: DatasetStatusEnum.syncing
websiteConfig: websiteConfig.websiteConfig,
chunkSettings: websiteConfig.chunkSettings
});
const billId = await postCreateTrainingUsage({
name: t('common:core.dataset.training.Website Sync'),
datasetId: datasetId
});
await postWebsiteSync({ datasetId: datasetId, billId });
return;
await syncWebsite();
},
errorToast: t('common:common.Update Failed')
});
{
onSuccess() {
onCloseWebsiteModal();
}
}
);
// collection list
const [searchText, setSearchText] = useState('');
@@ -124,7 +124,7 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) =>
});
const contextValue: CollectionPageContextType = {
openWebSyncConfirm: openWebSyncConfirm(onUpdateDatasetWebsiteConfig),
openWebSyncConfirm: openWebSyncConfirm(syncWebsite),
onOpenWebsiteModal,
searchText,
@@ -149,10 +149,6 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) =>
<WebSiteConfigModal
onClose={onCloseWebsiteModal}
onSuccess={onUpdateDatasetWebsiteConfig}
defaultValue={{
url: datasetDetail?.websiteConfig?.url,
selector: datasetDetail?.websiteConfig?.selector
}}
/>
)}
<ConfirmWebSyncModal />

View File

@@ -25,6 +25,9 @@ const EmptyCollectionTip = () => {
{datasetDetail.status === DatasetStatusEnum.syncing && (
<>{t('common:core.dataset.status.syncing')}</>
)}
{datasetDetail.status === DatasetStatusEnum.waiting && (
<>{t('common:core.dataset.status.waiting')}</>
)}
{datasetDetail.status === DatasetStatusEnum.active && (
<>
{!datasetDetail?.websiteConfig?.url ? (

View File

@@ -1,35 +1,23 @@
import React from 'react';
import {
Box,
Flex,
MenuButton,
Button,
Link,
useTheme,
useDisclosure,
HStack
} from '@chakra-ui/react';
import { Box, Flex, MenuButton, Button, Link, useDisclosure, HStack } from '@chakra-ui/react';
import {
getDatasetCollectionPathById,
postDatasetCollection,
putDatasetCollectionById
} from '@/web/core/dataset/api';
import { useQuery } from '@tanstack/react-query';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import MyInput from '@/components/MyInput';
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { useRouter } from 'next/router';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyMenu from '@fastgpt/web/components/common/MyMenu';
import { useEditTitle } from '@/web/common/hooks/useEditTitle';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum,
DatasetTypeEnum,
DatasetTypeMap,
DatasetStatusEnum,
DatasetCollectionDataProcessModeEnum
DatasetStatusEnum
} from '@fastgpt/global/core/dataset/constants';
import EditFolderModal, { useEditFolder } from '../../EditFolderModal';
import { TabEnum } from '../../../../pages/dataset/detail/index';
@@ -43,26 +31,35 @@ import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContex
import { useSystem } from '@fastgpt/web/hooks/useSystem';
import HeaderTagPopOver from './HeaderTagPopOver';
import MyBox from '@fastgpt/web/components/common/MyBox';
import Icon from '@fastgpt/web/components/common/Icon';
import MyTag from '@fastgpt/web/components/common/Tag/index';
const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector'));
const Header = ({}: {}) => {
const { t } = useTranslation();
const theme = useTheme();
const { feConfigs } = useSystemStore();
const { isPc } = useSystem();
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const router = useRouter();
const { parentId = '' } = router.query as { parentId: string };
const { isPc } = useSystem();
const { searchText, setSearchText, total, getData, pageNum, onOpenWebsiteModal } =
useContextSelector(CollectionPageContext, (v) => v);
const {
searchText,
setSearchText,
total,
getData,
pageNum,
onOpenWebsiteModal,
openWebSyncConfirm
} = useContextSelector(CollectionPageContext, (v) => v);
const { data: paths = [] } = useQuery(['getDatasetCollectionPathById', parentId], () =>
getDatasetCollectionPathById(parentId)
);
const { data: paths = [] } = useRequest2(() => getDatasetCollectionPathById(parentId), {
refreshDeps: [parentId],
manual: false
});
const { editFolderData, setEditFolderData } = useEditFolder();
const { onOpenModal: onOpenCreateVirtualFileModal, EditModal: EditCreateVirtualFileModal } =
@@ -72,13 +69,14 @@ const Header = ({}: {}) => {
canEmpty: false
});
// Import collection
const {
isOpen: isOpenFileSourceSelector,
onOpen: onOpenFileSourceSelector,
onClose: onCloseFileSourceSelector
} = useDisclosure();
const { runAsync: onCreateCollection, loading: onCreating } = useRequest2(
const { runAsync: onCreateCollection } = useRequest2(
async ({ name, type }: { name: string; type: DatasetCollectionTypeEnum }) => {
const id = await postDatasetCollection({
parentId,
@@ -100,7 +98,7 @@ const Header = ({}: {}) => {
const isWebSite = datasetDetail?.type === DatasetTypeEnum.websiteDataset;
return (
<MyBox isLoading={onCreating} display={['block', 'flex']} alignItems={'center'} gap={2}>
<MyBox display={['block', 'flex']} alignItems={'center'} gap={2}>
<HStack flex={1}>
<Box flex={1} fontWeight={'500'} color={'myGray.900'} whiteSpace={'nowrap'}>
<ParentPath
@@ -121,13 +119,15 @@ const Header = ({}: {}) => {
{!isWebSite && <MyIcon name="common/list" mr={2} w={'20px'} color={'black'} />}
{t(DatasetTypeMap[datasetDetail?.type]?.collectionLabel as any)}({total})
</Flex>
{/* Website sync */}
{datasetDetail?.websiteConfig?.url && (
<Flex fontSize={'mini'}>
{t('common:core.dataset.website.Base Url')}:
<Box>{t('common:core.dataset.website.Base Url')}:</Box>
<Link
className="textEllipsis"
maxW={'300px'}
href={datasetDetail.websiteConfig.url}
target="_blank"
mr={2}
color={'blue.700'}
>
{datasetDetail.websiteConfig.url}
@@ -171,12 +171,14 @@ const Header = ({}: {}) => {
)}
{/* Tag */}
{datasetDetail.permission.hasWritePer && feConfigs?.isPlus && <HeaderTagPopOver />}
{datasetDetail.type !== DatasetTypeEnum.websiteDataset &&
datasetDetail.permission.hasWritePer &&
feConfigs?.isPlus && <HeaderTagPopOver />}
</HStack>
{/* diff collection button */}
{datasetDetail.permission.hasWritePer && (
<Box textAlign={'end'} mt={[3, 0]}>
<Box mt={[3, 0]}>
{datasetDetail?.type === DatasetTypeEnum.dataset && (
<MyMenu
offset={[0, 5]}
@@ -233,9 +235,8 @@ const Header = ({}: {}) => {
onClick: () => {
onOpenCreateVirtualFileModal({
defaultVal: '',
onSuccess: (name) => {
onCreateCollection({ name, type: DatasetCollectionTypeEnum.virtual });
}
onSuccess: (name) =>
onCreateCollection({ name, type: DatasetCollectionTypeEnum.virtual })
});
}
},
@@ -272,35 +273,60 @@ const Header = ({}: {}) => {
{datasetDetail?.type === DatasetTypeEnum.websiteDataset && (
<>
{datasetDetail?.websiteConfig?.url ? (
<Flex alignItems={'center'}>
<>
{datasetDetail.status === DatasetStatusEnum.active && (
<Button onClick={onOpenWebsiteModal}>{t('common:common.Config')}</Button>
<HStack gap={2}>
<Button
onClick={onOpenWebsiteModal}
leftIcon={<Icon name="change" w={'1rem'} />}
>
{t('dataset:params_config')}
</Button>
<Button
variant={'whitePrimary'}
onClick={openWebSyncConfirm}
leftIcon={<Icon name="common/confirm/restoreTip" w={'1rem'} />}
>
{t('dataset:immediate_sync')}
</Button>
</HStack>
)}
{datasetDetail.status === DatasetStatusEnum.syncing && (
<Flex
ml={3}
alignItems={'center'}
<MyTag
colorSchema="purple"
showDot
px={3}
py={1}
borderRadius="md"
border={theme.borders.base}
h={'36px'}
DotStyles={{
w: '8px',
h: '8px',
animation: 'zoomStopIcon 0.5s infinite alternate'
}}
>
<Box
animation={'zoomStopIcon 0.5s infinite alternate'}
bg={'myGray.700'}
w="8px"
h="8px"
borderRadius={'50%'}
mt={'1px'}
></Box>
<Box ml={2} color={'myGray.600'}>
{t('common:core.dataset.status.syncing')}
</Box>
</Flex>
{t('common:core.dataset.status.syncing')}
</MyTag>
)}
</Flex>
{datasetDetail.status === DatasetStatusEnum.waiting && (
<MyTag
colorSchema="gray"
showDot
px={3}
h={'36px'}
DotStyles={{
w: '8px',
h: '8px',
animation: 'zoomStopIcon 0.5s infinite alternate'
}}
>
{t('common:core.dataset.status.waiting')}
</MyTag>
)}
</>
) : (
<Button onClick={onOpenWebsiteModal}>
<Button
onClick={onOpenWebsiteModal}
leftIcon={<Icon name="common/setting" w={'18px'} />}
>
{t('common:core.dataset.Set Website Config')}
</Button>
)}

View File

@@ -1,110 +1,215 @@
import React from 'react';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { useTranslation } from 'next-i18next';
import { Box, Button, Input, Link, ModalBody, ModalFooter } from '@chakra-ui/react';
import { strIsLink } from '@fastgpt/global/common/string/tools';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useForm } from 'react-hook-form';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { getDocPath } from '@/web/common/system/doc';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { useMyStep } from '@fastgpt/web/hooks/useStep';
import MyDivider from '@fastgpt/web/components/common/MyDivider';
import React, { useRef } from 'react';
import {
Box,
Link,
Input,
Button,
ModalBody,
ModalFooter,
Textarea,
Stack
} from '@chakra-ui/react';
import {
DataChunkSplitModeEnum,
DatasetCollectionDataProcessModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import CollectionChunkForm, {
collectionChunkForm2StoreChunkData,
type CollectionChunkFormType
} from '../Form/CollectionChunkForm';
import { getLLMDefaultChunkSize } from '@fastgpt/global/core/dataset/training/utils';
import { ChunkSettingsType } from '@fastgpt/global/core/dataset/type';
type FormType = {
url?: string | undefined;
selector?: string | undefined;
export type WebsiteConfigFormType = {
websiteConfig: {
url: string;
selector: string;
};
chunkSettings: ChunkSettingsType;
};
const WebsiteConfigModal = ({
onClose,
onSuccess,
defaultValue = {
url: '',
selector: ''
}
onSuccess
}: {
onClose: () => void;
onSuccess: (data: FormType) => void;
defaultValue?: FormType;
onSuccess: (data: WebsiteConfigFormType) => void;
}) => {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { toast } = useToast();
const { register, handleSubmit } = useForm({
defaultValues: defaultValue
const steps = [
{
title: t('dataset:website_info')
},
{
title: t('dataset:params_config')
}
];
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const websiteConfig = datasetDetail.websiteConfig;
const chunkSettings = datasetDetail.chunkSettings;
const {
register: websiteInfoForm,
handleSubmit: websiteInfoHandleSubmit,
getValues: websiteInfoGetValues
} = useForm({
defaultValues: {
url: websiteConfig?.url || '',
selector: websiteConfig?.selector || ''
}
});
const isEdit = !!defaultValue.url;
const confirmTip = isEdit
? t('common:core.dataset.website.Confirm Update Tips')
: t('common:core.dataset.website.Confirm Create Tips');
const isEdit = !!websiteConfig?.url;
const { ConfirmModal, openConfirm } = useConfirm({
type: 'common'
});
const { activeStep, goToPrevious, goToNext, MyStep } = useMyStep({
defaultStep: 0,
steps
});
const form = useForm<CollectionChunkFormType>({
defaultValues: {
trainingType: chunkSettings?.trainingType || DatasetCollectionDataProcessModeEnum.chunk,
imageIndex: chunkSettings?.imageIndex || false,
autoIndexes: chunkSettings?.autoIndexes || false,
chunkSettingMode: chunkSettings?.chunkSettingMode || ChunkSettingModeEnum.auto,
chunkSplitMode: chunkSettings?.chunkSplitMode || DataChunkSplitModeEnum.size,
embeddingChunkSize: chunkSettings?.chunkSize || 2000,
qaChunkSize: chunkSettings?.chunkSize || getLLMDefaultChunkSize(datasetDetail.agentModel),
indexSize: chunkSettings?.indexSize || datasetDetail.vectorModel?.defaultToken || 512,
chunkSplitter: chunkSettings?.chunkSplitter || '',
qaPrompt: chunkSettings?.qaPrompt || Prompt_AgentQA.description
}
});
return (
<MyModal
isOpen
iconSrc="core/dataset/websiteDataset"
title={t('common:core.dataset.website.Config')}
onClose={onClose}
maxW={'500px'}
w={'550px'}
>
<ModalBody>
<Box fontSize={'sm'} color={'myGray.600'}>
{t('common:core.dataset.website.Config Description')}
{feConfigs?.docUrl && (
<Link
href={getDocPath('/docs/guide/knowledge_base/websync/')}
target="_blank"
textDecoration={'underline'}
fontWeight={'bold'}
<ModalBody w={'full'}>
<Stack w={'75%'} marginX={'auto'}>
<MyStep />
</Stack>
<MyDivider />
{activeStep == 0 && (
<>
<Box
fontSize={'xs'}
color={'myGray.900'}
bgColor={'blue.50'}
padding={'4'}
borderRadius={'8px'}
>
{t('common:common.course.Read Course')}
</Link>
)}
</Box>
<Box mt={2}>
<Box>{t('common:core.dataset.website.Base Url')}</Box>
<Input
placeholder={t('common:core.dataset.collection.Website Link')}
{...register('url', {
required: true
})}
/>
</Box>
<Box mt={3}>
<Box>
{t('common:core.dataset.website.Selector')}({t('common:common.choosable')})
</Box>
<Input {...register('selector')} placeholder="body .content #document" />
</Box>
{t('common:core.dataset.website.Config Description')}
{feConfigs?.docUrl && (
<Link
href={getDocPath('/docs/guide/knowledge_base/websync/')}
target="_blank"
textDecoration={'underline'}
color={'blue.700'}
>
{t('common:common.course.Read Course')}
</Link>
)}
</Box>
<Box mt={2}>
<Box>{t('common:core.dataset.website.Base Url')}</Box>
<Input
placeholder={t('common:core.dataset.collection.Website Link')}
{...websiteInfoForm('url', {
required: true
})}
/>
</Box>
<Box mt={3}>
<Box>
{t('common:core.dataset.website.Selector')}({t('common:common.choosable')})
</Box>
<Input {...websiteInfoForm('selector')} placeholder="body .content #document" />
</Box>
</>
)}
{activeStep == 1 && <CollectionChunkForm form={form} />}
</ModalBody>
<ModalFooter>
<Button variant={'whiteBase'} onClick={onClose}>
{t('common:common.Close')}
</Button>
<Button
ml={2}
onClick={handleSubmit((data) => {
if (!data.url) return;
// check is link
if (!strIsLink(data.url)) {
return toast({
status: 'warning',
title: t('common:common.link.UnValid')
});
}
openConfirm(
() => {
onSuccess(data);
},
undefined,
confirmTip
)();
})}
>
{t('common:core.dataset.website.Start Sync')}
</Button>
{activeStep == 0 && (
<>
<Button variant={'whiteBase'} onClick={onClose}>
{t('common:common.Close')}
</Button>
<Button
ml={2}
onClick={websiteInfoHandleSubmit((data) => {
if (!data.url) return;
// check is link
if (!strIsLink(data.url)) {
return toast({
status: 'warning',
title: t('common:common.link.UnValid')
});
}
goToNext();
})}
>
{t('common:common.Next Step')}
</Button>
</>
)}
{activeStep == 1 && (
<>
<Button variant={'whiteBase'} onClick={goToPrevious}>
{t('common:common.Last Step')}
</Button>
<Button
ml={2}
onClick={form.handleSubmit((data) => {
openConfirm(
() =>
onSuccess({
websiteConfig: websiteInfoGetValues(),
chunkSettings: collectionChunkForm2StoreChunkData({
...data,
agentModel: datasetDetail.agentModel,
vectorModel: datasetDetail.vectorModel
})
}),
undefined,
isEdit
? t('common:core.dataset.website.Confirm Update Tips')
: t('common:core.dataset.website.Confirm Create Tips')
)();
})}
>
{t('common:core.dataset.website.Start Sync')}
</Button>
</>
)}
</ModalFooter>
<ConfirmModal />
</MyModal>
@@ -112,3 +217,42 @@ const WebsiteConfigModal = ({
};
export default WebsiteConfigModal;
const PromptTextarea = ({
defaultValue,
onChange,
onClose
}: {
defaultValue: string;
onChange: (e: string) => void;
onClose: () => void;
}) => {
const ref = useRef<HTMLTextAreaElement>(null);
const { t } = useTranslation();
return (
<MyModal
isOpen
title={t('common:core.dataset.import.Custom prompt')}
iconSrc="modal/edit"
w={'600px'}
onClose={onClose}
>
<ModalBody whiteSpace={'pre-wrap'} fontSize={'sm'} px={[3, 6]} pt={[3, 6]}>
<Textarea ref={ref} rows={8} fontSize={'sm'} defaultValue={defaultValue} />
<Box>{Prompt_AgentQA.fixedText}</Box>
</ModalBody>
<ModalFooter>
<Button
onClick={() => {
const val = ref.current?.value || Prompt_AgentQA.description;
onChange(val);
onClose();
}}
>
{t('common:common.Confirm')}
</Button>
</ModalFooter>
</MyModal>
);
};

View File

@@ -64,16 +64,6 @@ const CollectionCard = () => {
const { datasetDetail, loadDatasetDetail } = useContextSelector(DatasetPageContext, (v) => v);
const { feConfigs } = useSystemStore();
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
content: t('common:dataset.Confirm to delete the file'),
type: 'delete'
});
const { onOpenModal: onOpenEditTitleModal, EditModal: EditTitleModal } = useEditTitle({
title: t('common:Rename')
});
const [moveCollectionData, setMoveCollectionData] = useState<{ collectionId: string }>();
const [trainingStatesCollection, setTrainingStatesCollection] = useState<{
collectionId: string;
}>();
@@ -116,6 +106,11 @@ const CollectionCard = () => {
[collections, t]
);
const [moveCollectionData, setMoveCollectionData] = useState<{ collectionId: string }>();
const { onOpenModal: onOpenEditTitleModal, EditModal: EditTitleModal } = useEditTitle({
title: t('common:Rename')
});
const { runAsync: onUpdateCollection, loading: isUpdating } = useRequest2(
putDatasetCollectionById,
{
@@ -125,7 +120,12 @@ const CollectionCard = () => {
successToast: t('common:common.Update Success')
}
);
const { runAsync: onDelCollection, loading: isDeleting } = useRequest2(
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
content: t('common:dataset.Confirm to delete the file'),
type: 'delete'
});
const { runAsync: onDelCollection } = useRequest2(
(collectionId: string) => {
return delDatasetCollectionById({
id: collectionId
@@ -163,14 +163,14 @@ const CollectionCard = () => {
['refreshCollection'],
() => {
getData(pageNum);
if (datasetDetail.status === DatasetStatusEnum.syncing) {
if (datasetDetail.status !== DatasetStatusEnum.active) {
loadDatasetDetail(datasetDetail._id);
}
return null;
},
{
refetchInterval: 6000,
enabled: hasTrainingData || datasetDetail.status === DatasetStatusEnum.syncing
enabled: hasTrainingData || datasetDetail.status !== DatasetStatusEnum.active
}
);
@@ -190,7 +190,7 @@ const CollectionCard = () => {
});
const isLoading =
isUpdating || isDeleting || isSyncing || (isGetting && collections.length === 0) || isDropping;
isUpdating || isSyncing || (isGetting && collections.length === 0) || isDropping;
return (
<MyBox isLoading={isLoading} h={'100%'} py={[2, 4]}>
@@ -406,9 +406,7 @@ const CollectionCard = () => {
type: 'danger',
onClick: () =>
openDeleteConfirm(
() => {
onDelCollection(collection._id);
},
() => onDelCollection(collection._id),
undefined,
collection.type === DatasetCollectionTypeEnum.folder
? t('common:dataset.collections.Confirm to delete the folder')

View File

@@ -0,0 +1,524 @@
import MyModal from '@fastgpt/web/components/common/MyModal';
import { useTranslation } from 'next-i18next';
import { UseFormReturn } from 'react-hook-form';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import React, { useEffect, useMemo, useRef, useState } from 'react';
import {
Box,
Flex,
Input,
Button,
ModalBody,
ModalFooter,
Textarea,
useDisclosure,
Checkbox,
HStack
} from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import {
DataChunkSplitModeEnum,
DatasetCollectionDataProcessModeEnum,
DatasetCollectionDataProcessModeMap
} from '@fastgpt/global/core/dataset/constants';
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import { useContextSelector } from 'use-context-selector';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MySelect from '@fastgpt/web/components/common/MySelect';
import {
chunkAutoChunkSize,
getAutoIndexSize,
getIndexSizeSelectList,
getLLMDefaultChunkSize,
getLLMMaxChunkSize,
getMaxChunkSize,
getMaxIndexSize,
minChunkSize
} from '@fastgpt/global/core/dataset/training/utils';
import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup';
import { ChunkSettingsType } from '@fastgpt/global/core/dataset/type';
import type { LLMModelItemType, EmbeddingModelItemType } from '@fastgpt/global/core/ai/model.d';
const PromptTextarea = ({
defaultValue = '',
onChange,
onClose
}: {
defaultValue?: string;
onChange: (e: string) => void;
onClose: () => void;
}) => {
const ref = useRef<HTMLTextAreaElement>(null);
const { t } = useTranslation();
return (
<MyModal
isOpen
title={t('common:core.dataset.import.Custom prompt')}
iconSrc="modal/edit"
w={'600px'}
onClose={onClose}
>
<ModalBody whiteSpace={'pre-wrap'} fontSize={'sm'} px={[3, 6]} pt={[3, 6]}>
<Textarea ref={ref} rows={8} fontSize={'sm'} defaultValue={defaultValue} />
<Box>{Prompt_AgentQA.fixedText}</Box>
</ModalBody>
<ModalFooter>
<Button
onClick={() => {
const val = ref.current?.value || Prompt_AgentQA.description;
onChange(val);
onClose();
}}
>
{t('common:common.Confirm')}
</Button>
</ModalFooter>
</MyModal>
);
};
export type CollectionChunkFormType = {
trainingType: DatasetCollectionDataProcessModeEnum;
imageIndex: boolean;
autoIndexes: boolean;
chunkSettingMode: ChunkSettingModeEnum;
chunkSplitMode: DataChunkSplitModeEnum;
embeddingChunkSize: number;
qaChunkSize: number;
chunkSplitter?: string;
indexSize: number;
qaPrompt?: string;
};
const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkFormType> }) => {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const vectorModel = datasetDetail.vectorModel;
const agentModel = datasetDetail.agentModel;
const { setValue, register, watch, getValues } = form;
const trainingType = watch('trainingType');
const chunkSettingMode = watch('chunkSettingMode');
const chunkSplitMode = watch('chunkSplitMode');
const autoIndexes = watch('autoIndexes');
const indexSize = watch('indexSize');
const trainingModeList = useMemo(() => {
const list = Object.entries(DatasetCollectionDataProcessModeMap);
return list
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
.map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
}, [t]);
const {
chunkSizeField,
maxChunkSize,
minChunkSize: minChunkSizeValue,
maxIndexSize
} = useMemo(() => {
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
return {
chunkSizeField: 'qaChunkSize',
maxChunkSize: getLLMMaxChunkSize(agentModel),
minChunkSize: 1000,
maxIndexSize: 1000
};
} else if (autoIndexes) {
return {
chunkSizeField: 'embeddingChunkSize',
maxChunkSize: getMaxChunkSize(agentModel),
minChunkSize: minChunkSize,
maxIndexSize: getMaxIndexSize(vectorModel)
};
} else {
return {
chunkSizeField: 'embeddingChunkSize',
maxChunkSize: getMaxChunkSize(agentModel),
minChunkSize: minChunkSize,
maxIndexSize: getMaxIndexSize(vectorModel)
};
}
}, [trainingType, autoIndexes, agentModel, vectorModel]);
// Custom split list
const customSplitList = [
{ label: t('dataset:split_sign_null'), value: '' },
{ label: t('dataset:split_sign_break'), value: '\\n' },
{ label: t('dataset:split_sign_break2'), value: '\\n\\n' },
{ label: t('dataset:split_sign_period'), value: '.|。' },
{ label: t('dataset:split_sign_exclamatiob'), value: '!|' },
{ label: t('dataset:split_sign_question'), value: '?|' },
{ label: t('dataset:split_sign_semicolon'), value: ';|' },
{ label: '=====', value: '=====' },
{ label: t('dataset:split_sign_custom'), value: 'Other' }
];
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter'));
useEffect(() => {
if (customListSelectValue === 'Other') {
setValue('chunkSplitter', '');
} else {
setValue('chunkSplitter', customListSelectValue);
}
}, [customListSelectValue, setValue]);
// Index size
const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]);
// QA
const qaPrompt = watch('qaPrompt');
const {
isOpen: isOpenCustomPrompt,
onOpen: onOpenCustomPrompt,
onClose: onCloseCustomPrompt
} = useDisclosure();
const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
// Adapt 4.9.0- auto training
useEffect(() => {
if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
setValue('autoIndexes', true);
setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
}
}, [trainingType, setValue]);
return (
<>
<Box>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:training_mode')}
</Box>
<LeftRadio<DatasetCollectionDataProcessModeEnum>
list={trainingModeList}
px={3}
py={2.5}
value={trainingType}
onChange={(e) => {
setValue('trainingType', e);
}}
defaultBg="white"
activeBg="white"
gridTemplateColumns={'repeat(2, 1fr)'}
/>
</Box>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:enhanced_indexes')}
</Box>
<HStack gap={[3, 7]}>
<HStack flex={'1'} spacing={1}>
<MyTooltip label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}>
<Checkbox isDisabled={!feConfigs?.isPlus} {...register('autoIndexes')}>
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
</Checkbox>
</MyTooltip>
<QuestionTip label={t('dataset:auto_indexes_tips')} />
</HStack>
<HStack flex={'1'} spacing={1}>
<MyTooltip
label={
!feConfigs?.isPlus
? t('common:commercial_function_tip')
: !datasetDetail?.vlmModel
? t('common:error_vlm_not_config')
: ''
}
>
<Checkbox
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
{...register('imageIndex')}
>
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
</Checkbox>
</MyTooltip>
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
</HStack>
</HStack>
</Box>
)}
<Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:params_setting')}
</Box>
<LeftRadio<ChunkSettingModeEnum>
list={[
{
title: t('dataset:default_params'),
desc: t('dataset:default_params_desc'),
value: ChunkSettingModeEnum.auto
},
{
title: t('dataset:custom_data_process_params'),
desc: t('dataset:custom_data_process_params_desc'),
value: ChunkSettingModeEnum.custom,
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
<Box mt={5}>
<Box>
<RadioGroup<DataChunkSplitModeEnum>
list={[
{
title: t('dataset:split_chunk_size'),
value: DataChunkSplitModeEnum.size
},
{
title: t('dataset:split_chunk_char'),
value: DataChunkSplitModeEnum.char,
tooltip: t('dataset:custom_split_sign_tip')
}
]}
value={chunkSplitMode}
onChange={(e) => {
setValue('chunkSplitMode', e);
}}
/>
{chunkSplitMode === DataChunkSplitModeEnum.size && (
<Box
mt={1.5}
css={{
'& > span': {
display: 'block'
}
}}
>
<MyTooltip
label={t('common:core.dataset.import.Chunk Range', {
min: minChunkSizeValue,
max: maxChunkSize
})}
>
<MyNumberInput
register={register}
name={chunkSizeField}
min={minChunkSizeValue}
max={maxChunkSize}
size={'sm'}
step={100}
/>
</MyTooltip>
</Box>
)}
{chunkSplitMode === DataChunkSplitModeEnum.char && (
<HStack mt={1.5}>
<Box flex={'1 0 0'}>
<MySelect<string>
list={customSplitList}
size={'sm'}
bg={'myGray.50'}
value={customListSelectValue}
h={'32px'}
onChange={(val) => {
setCustomListSelectValue(val);
}}
/>
</Box>
{customListSelectValue === 'Other' && (
<Input
flex={'1 0 0'}
h={'32px'}
size={'sm'}
bg={'myGray.50'}
placeholder="\n;======;==SPLIT=="
{...register('chunkSplitter')}
/>
)}
</HStack>
)}
</Box>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box>
<Flex alignItems={'center'} mt={3}>
<Box>{t('dataset:index_size')}</Box>
<QuestionTip label={t('dataset:index_size_tips')} />
</Flex>
<Box mt={1}>
<MySelect<number>
bg={'myGray.50'}
list={indexSizeSeletorList}
value={indexSize}
onChange={(val) => {
setValue('indexSize', val);
}}
/>
</Box>
</Box>
)}
{showQAPromptInput && (
<Box mt={3}>
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
<Box
position={'relative'}
py={2}
px={3}
bg={'myGray.50'}
fontSize={'xs'}
whiteSpace={'pre-wrap'}
border={'1px'}
borderColor={'borderColor.base'}
borderRadius={'md'}
maxH={'140px'}
overflow={'auto'}
_hover={{
'& .mask': {
display: 'block'
}
}}
>
{qaPrompt}
<Box
display={'none'}
className="mask"
position={'absolute'}
top={0}
right={0}
bottom={0}
left={0}
background={
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
}
>
<Button
size="xs"
variant={'whiteBase'}
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
color={'black'}
position={'absolute'}
right={2}
bottom={2}
onClick={onOpenCustomPrompt}
>
{t('common:core.dataset.import.Custom prompt')}
</Button>
</Box>
</Box>
</Box>
)}
</Box>
)
}
]}
gridGap={3}
px={3}
py={3}
defaultBg="white"
activeBg="white"
value={chunkSettingMode}
w={'100%'}
onChange={(e) => {
setValue('chunkSettingMode', e);
}}
/>
</Box>
{isOpenCustomPrompt && (
<PromptTextarea
defaultValue={qaPrompt}
onChange={(e) => {
setValue('qaPrompt', e);
}}
onClose={onCloseCustomPrompt}
/>
)}
</>
);
};
export default CollectionChunkForm;
export const collectionChunkForm2StoreChunkData = ({
trainingType,
imageIndex,
autoIndexes,
chunkSettingMode,
chunkSplitMode,
embeddingChunkSize,
qaChunkSize,
chunkSplitter,
indexSize,
qaPrompt,
agentModel,
vectorModel
}: CollectionChunkFormType & {
agentModel: LLMModelItemType;
vectorModel: EmbeddingModelItemType;
}): ChunkSettingsType => {
const trainingModeSize: {
autoChunkSize: number;
autoIndexSize: number;
chunkSize: number;
indexSize: number;
} = (() => {
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
return {
autoChunkSize: getLLMDefaultChunkSize(agentModel),
autoIndexSize: 512,
chunkSize: qaChunkSize,
indexSize: 512
};
} else if (autoIndexes) {
return {
autoChunkSize: chunkAutoChunkSize,
autoIndexSize: getAutoIndexSize(vectorModel),
chunkSize: embeddingChunkSize,
indexSize
};
} else {
return {
autoChunkSize: chunkAutoChunkSize,
autoIndexSize: getAutoIndexSize(vectorModel),
chunkSize: embeddingChunkSize,
indexSize
};
}
})();
const { chunkSize: formatChunkIndex, indexSize: formatIndexSize } = (() => {
if (chunkSettingMode === ChunkSettingModeEnum.auto) {
return {
chunkSize: trainingModeSize.autoChunkSize,
indexSize: trainingModeSize.autoIndexSize
};
} else {
return {
chunkSize: trainingModeSize.chunkSize,
indexSize: trainingModeSize.indexSize
};
}
})();
return {
trainingType,
imageIndex,
autoIndexes,
chunkSettingMode,
chunkSplitMode,
chunkSize: formatChunkIndex,
indexSize: formatIndexSize,
chunkSplitter,
qaPrompt: trainingType === DatasetCollectionDataProcessModeEnum.qa ? qaPrompt : undefined
};
};

View File

@@ -25,6 +25,14 @@ import {
getAutoIndexSize,
getMaxIndexSize
} from '@fastgpt/global/core/dataset/training/utils';
import { CollectionChunkFormType } from '../Form/CollectionChunkForm';
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
export type ImportFormType = {
customPdfParse: boolean;
webSelector: string;
} & CollectionChunkFormType;
type TrainingFiledType = {
chunkOverlapRatio: number;
@@ -51,26 +59,6 @@ type DatasetImportContextType = {
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
} & TrainingFiledType;
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
export type ImportFormType = {
customPdfParse: boolean;
trainingType: DatasetCollectionDataProcessModeEnum;
imageIndex: boolean;
autoIndexes: boolean;
chunkSettingMode: ChunkSettingModeEnum;
chunkSplitMode: DataChunkSplitModeEnum;
embeddingChunkSize: number;
qaChunkSize: number;
chunkSplitter: string;
indexSize: number;
qaPrompt: string;
webSelector: string;
};
export const DatasetImportContext = createContext<DatasetImportContextType>({
importSource: ImportDataSourceEnum.fileLocal,
goToNext: function (): void {
@@ -314,14 +302,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
chunkSplitter
};
}
}, [
chunkSettingMode,
TrainingModeMap.autoChunkSize,
TrainingModeMap.autoIndexSize,
TrainingModeMap.chunkSize,
TrainingModeMap.indexSize,
chunkSplitter
]);
}, [chunkSettingMode, TrainingModeMap, chunkSplitter]);
const contextValue = {
...TrainingModeMap,

View File

@@ -1,13 +1,8 @@
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import React, { useCallback } from 'react';
import {
Box,
Flex,
Input,
Button,
ModalBody,
ModalFooter,
Textarea,
useDisclosure,
Checkbox,
Accordion,
AccordionItem,
@@ -16,93 +11,26 @@ import {
AccordionIcon,
HStack
} from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import {
DataChunkSplitModeEnum,
DatasetCollectionDataProcessModeEnum,
DatasetCollectionDataProcessModeMap
} from '@fastgpt/global/core/dataset/constants';
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { shadowLight } from '@fastgpt/web/styles/theme';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MySelect from '@fastgpt/web/components/common/MySelect';
import { getIndexSizeSelectList } from '@fastgpt/global/core/dataset/training/utils';
import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup';
import CollectionChunkForm from '../../Form/CollectionChunkForm';
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
function DataProcess() {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const {
goToNext,
processParamsForm,
chunkSizeField,
minChunkSize,
maxChunkSize,
maxIndexSize,
indexSize
} = useContextSelector(DatasetImportContext, (v) => v);
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const { setValue, register, watch, getValues } = processParamsForm;
const trainingType = watch('trainingType');
const trainingModeList = useMemo(() => {
const list = Object.entries(DatasetCollectionDataProcessModeMap);
return list
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
.map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
}, [t]);
const chunkSettingMode = watch('chunkSettingMode');
const chunkSplitMode = watch('chunkSplitMode');
const customSplitList = [
{ label: t('dataset:split_sign_null'), value: '' },
{ label: t('dataset:split_sign_break'), value: '\\n' },
{ label: t('dataset:split_sign_break2'), value: '\\n\\n' },
{ label: t('dataset:split_sign_period'), value: '.|。' },
{ label: t('dataset:split_sign_exclamatiob'), value: '!|' },
{ label: t('dataset:split_sign_question'), value: '?|' },
{ label: t('dataset:split_sign_semicolon'), value: ';|' },
{ label: '=====', value: '=====' },
{ label: t('dataset:split_sign_custom'), value: 'Other' }
];
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter'));
useEffect(() => {
if (customListSelectValue === 'Other') {
setValue('chunkSplitter', '');
} else {
setValue('chunkSplitter', customListSelectValue);
}
}, [customListSelectValue, setValue]);
// Index size
const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]);
// QA
const qaPrompt = watch('qaPrompt');
const {
isOpen: isOpenCustomPrompt,
onOpen: onOpenCustomPrompt,
onClose: onCloseCustomPrompt
} = useDisclosure();
const { goToNext, processParamsForm, chunkSize } = useContextSelector(
DatasetImportContext,
(v) => v
);
const { register } = processParamsForm;
const Title = useCallback(({ title }: { title: string }) => {
return (
@@ -116,16 +44,7 @@ function DataProcess() {
);
}, []);
// Adapt auto training
useEffect(() => {
if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
setValue('autoIndexes', true);
setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
}
}, [trainingType, setValue]);
const showFileParseSetting = feConfigs?.showCustomPdfParse;
const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
return (
<>
@@ -179,238 +98,8 @@ function DataProcess() {
<Title title={t('dataset:import_data_process_setting')} />
<AccordionPanel p={2}>
<Box mt={2}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:training_mode')}
</Box>
<LeftRadio<DatasetCollectionDataProcessModeEnum>
list={trainingModeList}
px={3}
py={2.5}
value={trainingType}
onChange={(e) => {
setValue('trainingType', e);
}}
defaultBg="white"
activeBg="white"
gridTemplateColumns={'repeat(2, 1fr)'}
/>
</Box>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:enhanced_indexes')}
</Box>
<HStack gap={[3, 7]}>
<HStack flex={'1'} spacing={1}>
<MyTooltip
label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}
>
<Checkbox isDisabled={!feConfigs?.isPlus} {...register('autoIndexes')}>
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
</Checkbox>
</MyTooltip>
<QuestionTip label={t('dataset:auto_indexes_tips')} />
</HStack>
<HStack flex={'1'} spacing={1}>
<MyTooltip
label={
!feConfigs?.isPlus
? t('common:commercial_function_tip')
: !datasetDetail?.vlmModel
? t('common:error_vlm_not_config')
: ''
}
>
<Checkbox
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
{...register('imageIndex')}
>
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
</Checkbox>
</MyTooltip>
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
</HStack>
</HStack>
</Box>
)}
<Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:params_setting')}
</Box>
<LeftRadio<ChunkSettingModeEnum>
list={[
{
title: t('dataset:default_params'),
desc: t('dataset:default_params_desc'),
value: ChunkSettingModeEnum.auto
},
{
title: t('dataset:custom_data_process_params'),
desc: t('dataset:custom_data_process_params_desc'),
value: ChunkSettingModeEnum.custom,
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
<Box mt={5}>
<Box>
<RadioGroup<DataChunkSplitModeEnum>
list={[
{
title: t('dataset:split_chunk_size'),
value: DataChunkSplitModeEnum.size
},
{
title: t('dataset:split_chunk_char'),
value: DataChunkSplitModeEnum.char,
tooltip: t('dataset:custom_split_sign_tip')
}
]}
value={chunkSplitMode}
onChange={(e) => {
setValue('chunkSplitMode', e);
}}
/>
{chunkSplitMode === DataChunkSplitModeEnum.size && (
<Box
mt={1.5}
css={{
'& > span': {
display: 'block'
}
}}
>
<MyTooltip
label={t('common:core.dataset.import.Chunk Range', {
min: minChunkSize,
max: maxChunkSize
})}
>
<MyNumberInput
register={register}
name={chunkSizeField}
min={minChunkSize}
max={maxChunkSize}
size={'sm'}
step={100}
/>
</MyTooltip>
</Box>
)}
{chunkSplitMode === DataChunkSplitModeEnum.char && (
<HStack mt={1.5}>
<Box flex={'1 0 0'}>
<MySelect<string>
list={customSplitList}
size={'sm'}
bg={'myGray.50'}
value={customListSelectValue}
h={'32px'}
onChange={(val) => {
setCustomListSelectValue(val);
}}
/>
</Box>
{customListSelectValue === 'Other' && (
<Input
flex={'1 0 0'}
h={'32px'}
size={'sm'}
bg={'myGray.50'}
placeholder="\n;======;==SPLIT=="
{...register('chunkSplitter')}
/>
)}
</HStack>
)}
</Box>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box>
<Flex alignItems={'center'} mt={3}>
<Box>{t('dataset:index_size')}</Box>
<QuestionTip label={t('dataset:index_size_tips')} />
</Flex>
<Box mt={1}>
<MySelect<number>
bg={'myGray.50'}
list={indexSizeSeletorList}
value={indexSize}
onChange={(val) => {
setValue('indexSize', val);
}}
/>
</Box>
</Box>
)}
{showQAPromptInput && (
<Box mt={3}>
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
<Box
position={'relative'}
py={2}
px={3}
bg={'myGray.50'}
fontSize={'xs'}
whiteSpace={'pre-wrap'}
border={'1px'}
borderColor={'borderColor.base'}
borderRadius={'md'}
maxH={'140px'}
overflow={'auto'}
_hover={{
'& .mask': {
display: 'block'
}
}}
>
{qaPrompt}
<Box
display={'none'}
className="mask"
position={'absolute'}
top={0}
right={0}
bottom={0}
left={0}
background={
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
}
>
<Button
size="xs"
variant={'whiteBase'}
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
color={'black'}
position={'absolute'}
right={2}
bottom={2}
onClick={onOpenCustomPrompt}
>
{t('common:core.dataset.import.Custom prompt')}
</Button>
</Box>
</Box>
</Box>
)}
</Box>
)
}
]}
gridGap={3}
px={3}
py={3}
defaultBg="white"
activeBg="white"
value={chunkSettingMode}
w={'100%'}
onChange={(e) => {
setValue('chunkSettingMode', e);
}}
/>
</Box>
{/* @ts-ignore */}
<CollectionChunkForm form={processParamsForm} />
</AccordionPanel>
</AccordionItem>
@@ -425,57 +114,8 @@ function DataProcess() {
</Flex>
</Accordion>
</Box>
{isOpenCustomPrompt && (
<PromptTextarea
defaultValue={qaPrompt}
onChange={(e) => {
setValue('qaPrompt', e);
}}
onClose={onCloseCustomPrompt}
/>
)}
</>
);
}
export default React.memo(DataProcess);
const PromptTextarea = ({
defaultValue,
onChange,
onClose
}: {
defaultValue: string;
onChange: (e: string) => void;
onClose: () => void;
}) => {
const ref = useRef<HTMLTextAreaElement>(null);
const { t } = useTranslation();
return (
<MyModal
isOpen
title={t('common:core.dataset.import.Custom prompt')}
iconSrc="modal/edit"
w={'600px'}
onClose={onClose}
>
<ModalBody whiteSpace={'pre-wrap'} fontSize={'sm'} px={[3, 6]} pt={[3, 6]}>
<Textarea ref={ref} rows={8} fontSize={'sm'} defaultValue={defaultValue} />
<Box>{Prompt_AgentQA.fixedText}</Box>
</ModalBody>
<ModalFooter>
<Button
onClick={() => {
const val = ref.current?.value || Prompt_AgentQA.description;
onChange(val);
onClose();
}}
>
{t('common:common.Confirm')}
</Button>
</ModalFooter>
</MyModal>
);
};

View File

@@ -85,9 +85,13 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
},
{
label: t('common:core.dataset.collection.metadata.Chunk Size'),
label: t('dataset:chunk_size'),
value: collection.chunkSize || '-'
},
{
label: t('dataset:index_size'),
value: collection.indexSize || '-'
},
...(webSelector
? [
{

View File

@@ -0,0 +1,53 @@
import { NextAPI } from '@/service/middleware/entry';
import { retryFn } from '@fastgpt/global/common/system/utils';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { upsertWebsiteSyncJobScheduler } from '@fastgpt/service/core/dataset/websiteSync';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { addHours } from 'date-fns';
import { NextApiRequest, NextApiResponse } from 'next';
const initWebsiteSyncData = async () => {
// find out all website dataset
const datasets = await MongoDataset.find({ type: DatasetTypeEnum.websiteDataset }).lean();
console.log('更新站点同步的定时器');
// Add scheduler for all website dataset
await Promise.all(
datasets.map((dataset) => {
if (dataset.autoSync) {
// 随机生成一个往后 124 小时的时间
const time = addHours(new Date(), Math.floor(Math.random() * 23) + 1);
return retryFn(() =>
upsertWebsiteSyncJobScheduler({ datasetId: String(dataset._id) }, time.getTime())
);
}
})
);
console.log('移除站点同步集合的定时器');
// Remove all nextSyncTime
await retryFn(() =>
MongoDatasetCollection.updateMany(
{
teamId: datasets.map((dataset) => dataset.teamId),
datasetId: datasets.map((dataset) => dataset._id)
},
{
$unset: {
nextSyncTime: 1
}
}
)
);
};
async function handler(req: NextApiRequest, _res: NextApiResponse) {
await authCert({ req, authRoot: true });
await initWebsiteSyncData();
return { success: true };
}
export default NextAPI(handler);

View File

@@ -9,6 +9,8 @@ import { OwnerPermissionVal } from '@fastgpt/global/support/permission/constant'
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { MongoDatasetCollectionTags } from '@fastgpt/service/core/dataset/tag/schema';
import { removeImageByPath } from '@fastgpt/service/common/file/image/controller';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { removeWebsiteSyncJobScheduler } from '@fastgpt/service/core/dataset/websiteSync';
async function handler(req: NextApiRequest) {
const { id: datasetId } = req.query as {
@@ -40,6 +42,13 @@ async function handler(req: NextApiRequest) {
datasetId: { $in: datasetIds }
});
await Promise.all(
datasets.map((dataset) => {
if (dataset.type === DatasetTypeEnum.websiteDataset)
return removeWebsiteSyncJobScheduler(String(dataset._id));
})
);
// delete all dataset.data and pg data
await mongoSessionRun(async (session) => {
// delete dataset data

View File

@@ -5,6 +5,8 @@ import { NextAPI } from '@/service/middleware/entry';
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { getWebsiteSyncDatasetStatus } from '@fastgpt/service/core/dataset/websiteSync';
import { DatasetStatusEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
type Query = {
id: string;
@@ -28,8 +30,17 @@ async function handler(req: ApiRequestProps<Query>): Promise<DatasetItemType> {
per: ReadPermissionVal
});
const status = await (async () => {
if (dataset.type === DatasetTypeEnum.websiteDataset) {
return await getWebsiteSyncDatasetStatus(datasetId);
}
return DatasetStatusEnum.active;
})();
return {
...dataset,
status,
apiServer: dataset.apiServer
? {
baseUrl: dataset.apiServer.baseUrl,

View File

@@ -30,6 +30,13 @@ import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection
import { addDays } from 'date-fns';
import { refreshSourceAvatar } from '@fastgpt/service/common/file/image/controller';
import { MongoResourcePermission } from '@fastgpt/service/support/permission/schema';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import {
removeWebsiteSyncJobScheduler,
upsertWebsiteSyncJobScheduler
} from '@fastgpt/service/core/dataset/websiteSync';
import { delDatasetRelevantData } from '@fastgpt/service/core/dataset/controller';
import { isEqual } from 'lodash';
export type DatasetUpdateQuery = {};
export type DatasetUpdateResponse = any;
@@ -62,8 +69,8 @@ async function handler(
apiServer,
yuqueServer,
feishuServer,
status,
autoSync
autoSync,
chunkSettings
} = req.body;
if (!id) {
@@ -114,6 +121,39 @@ async function handler(
});
const onUpdate = async (session: ClientSession) => {
// Website dataset update chunkSettings, need to clean up dataset
if (
dataset.type === DatasetTypeEnum.websiteDataset &&
chunkSettings &&
dataset.chunkSettings &&
!isEqual(
{
imageIndex: dataset.chunkSettings.imageIndex,
autoIndexes: dataset.chunkSettings.autoIndexes,
trainingType: dataset.chunkSettings.trainingType,
chunkSettingMode: dataset.chunkSettings.chunkSettingMode,
chunkSplitMode: dataset.chunkSettings.chunkSplitMode,
chunkSize: dataset.chunkSettings.chunkSize,
chunkSplitter: dataset.chunkSettings.chunkSplitter,
indexSize: dataset.chunkSettings.indexSize,
qaPrompt: dataset.chunkSettings.qaPrompt
},
{
imageIndex: chunkSettings.imageIndex,
autoIndexes: chunkSettings.autoIndexes,
trainingType: chunkSettings.trainingType,
chunkSettingMode: chunkSettings.chunkSettingMode,
chunkSplitMode: chunkSettings.chunkSplitMode,
chunkSize: chunkSettings.chunkSize,
chunkSplitter: chunkSettings.chunkSplitter,
indexSize: chunkSettings.indexSize,
qaPrompt: chunkSettings.qaPrompt
}
)
) {
await delDatasetRelevantData({ datasets: [dataset], session });
}
await MongoDataset.findByIdAndUpdate(
id,
{
@@ -123,7 +163,7 @@ async function handler(
...(agentModel && { agentModel }),
...(vlmModel && { vlmModel }),
...(websiteConfig && { websiteConfig }),
...(status && { status }),
...(chunkSettings && { chunkSettings }),
...(intro !== undefined && { intro }),
...(externalReadUrl !== undefined && { externalReadUrl }),
...(!!apiServer?.baseUrl && { 'apiServer.baseUrl': apiServer.baseUrl }),
@@ -143,8 +183,7 @@ async function handler(
{ session }
);
await updateSyncSchedule({
teamId: dataset.teamId,
datasetId: dataset._id,
dataset,
autoSync,
session
});
@@ -221,45 +260,54 @@ const updateTraining = async ({
};
const updateSyncSchedule = async ({
teamId,
datasetId,
dataset,
autoSync,
session
}: {
teamId: string;
datasetId: string;
dataset: DatasetSchemaType;
autoSync?: boolean;
session: ClientSession;
}) => {
if (typeof autoSync !== 'boolean') return;
// Update all collection nextSyncTime
if (autoSync) {
await MongoDatasetCollection.updateMany(
{
teamId,
datasetId,
type: { $in: [DatasetCollectionTypeEnum.apiFile, DatasetCollectionTypeEnum.link] }
},
{
$set: {
nextSyncTime: addDays(new Date(), 1)
}
},
{ session }
);
if (dataset.type === DatasetTypeEnum.websiteDataset) {
if (autoSync) {
// upsert Job Scheduler
upsertWebsiteSyncJobScheduler({ datasetId: String(dataset._id) });
} else {
// remove Job Scheduler
removeWebsiteSyncJobScheduler(String(dataset._id));
}
} else {
await MongoDatasetCollection.updateMany(
{
teamId,
datasetId
},
{
$unset: {
nextSyncTime: 1
}
},
{ session }
);
// Other dataset, update the collection sync
if (autoSync) {
await MongoDatasetCollection.updateMany(
{
teamId: dataset.teamId,
datasetId: dataset._id,
type: { $in: [DatasetCollectionTypeEnum.apiFile, DatasetCollectionTypeEnum.link] }
},
{
$set: {
nextSyncTime: addDays(new Date(), 1)
}
},
{ session }
);
} else {
await MongoDatasetCollection.updateMany(
{
teamId: dataset.teamId,
datasetId: dataset._id
},
{
$unset: {
nextSyncTime: 1
}
},
{ session }
);
}
}
};

View File

@@ -47,7 +47,6 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
avatar: '/icon/logo.svg',
name: '',
intro: '',
status: 'active',
vectorModel: defaultVectorModels[0].model,
agentModel: defaultQAModels[0].model,
inheritPermission: true